From 4a337904e627fd4e9c38a70cb17d38b13ff69fcd Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 5 Feb 2026 17:30:33 +0530 Subject: [PATCH 1/9] HDDS-14103. Create an option in SCM to ack/ignore missing containers --- .../hdds/scm/container/ContainerInfo.java | 32 ++++- .../hadoop/hdds/scm/client/ScmClient.java | 15 +++ .../StorageContainerLocationProtocol.java | 16 +++ ...ocationProtocolClientSideTranslatorPB.java | 20 +++ .../src/main/proto/ScmAdminProtocol.proto | 20 +++ .../src/main/proto/hdds.proto | 1 + .../hdds/scm/container/ContainerManager.java | 11 ++ .../scm/container/ContainerManagerImpl.java | 15 +++ .../scm/container/ContainerStateManager.java | 10 ++ .../container/ContainerStateManagerImpl.java | 17 +++ .../replication/ReplicationManager.java | 2 + .../AcknowledgedMissingContainerHandler.java | 57 +++++++++ .../health/ClosingContainerHandler.java | 5 +- ...ocationProtocolServerSideTranslatorPB.java | 29 +++++ .../scm/server/SCMClientProtocolServer.java | 51 ++++++++ .../apache/hadoop/ozone/audit/SCMAction.java | 4 +- .../health/TestClosingContainerHandler.java | 2 +- .../scm/cli/ContainerOperationClient.java | 10 ++ .../cli/container/AckMissingSubcommand.java | 97 ++++++++++++++ .../scm/cli/container/ContainerCommands.java | 4 +- .../cli/container/UnackMissingSubcommand.java | 74 +++++++++++ .../TestAckMissingContainerSubcommand.java | 119 ++++++++++++++++++ 22 files changed, 604 insertions(+), 7 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java create mode 100644 hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java create mode 100644 hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java create mode 100644 hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index 2beef2abf885..b53ae71b7ddb 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -87,6 +87,7 @@ public final class ContainerInfo implements Comparable { private long sequenceId; // Health state of the container (determined by ReplicationManager) private ContainerHealthState healthState; + private boolean ackMissing; private ContainerInfo(Builder b) { containerID = ContainerID.valueOf(b.containerID); @@ -102,6 +103,7 @@ private ContainerInfo(Builder b) { replicationConfig = b.replicationConfig; clock = b.clock; healthState = b.healthState != null ? b.healthState : ContainerHealthState.HEALTHY; + ackMissing = b.ackMissing; } public static Codec getCodec() { @@ -121,7 +123,8 @@ public static ContainerInfo fromProtobuf(HddsProtos.ContainerInfoProto info) { .setContainerID(info.getContainerID()) .setDeleteTransactionId(info.getDeleteTransactionId()) .setReplicationConfig(config) - .setSequenceId(info.getSequenceId()); + .setSequenceId(info.getSequenceId()) + .setAckMissing(info.getAckMissing()); if (info.hasPipelineID()) { builder.setPipelineID(PipelineID.getFromProtobuf(info.getPipelineID())); @@ -263,6 +266,24 @@ public void setHealthState(ContainerHealthState newHealthState) { this.healthState = newHealthState; } + /** + * Check if container is acked as missing. + * + * @return boolean + */ + public boolean getAckMissing() { + return ackMissing; + } + + /** + * Set the boolean for ackMissing. + * + * @param isAckMissing checks if container is acked as missing or not + */ + public void setAckMissing(boolean isAckMissing) { + this.ackMissing = isAckMissing; + } + @JsonIgnore public HddsProtos.ContainerInfoProto getProtobuf() { HddsProtos.ContainerInfoProto.Builder builder = @@ -275,7 +296,8 @@ public HddsProtos.ContainerInfoProto getProtobuf() { .setDeleteTransactionId(getDeleteTransactionId()) .setOwner(getOwner()) .setSequenceId(getSequenceId()) - .setReplicationType(getReplicationType()); + .setReplicationType(getReplicationType()) + .setAckMissing(getAckMissing()); if (replicationConfig instanceof ECReplicationConfig) { builder.setEcReplicationConfig(((ECReplicationConfig) replicationConfig) @@ -393,6 +415,7 @@ public static class Builder { private PipelineID pipelineID; private ReplicationConfig replicationConfig; private ContainerHealthState healthState; + private boolean ackMissing; public Builder setPipelineID(PipelineID pipelineId) { this.pipelineID = pipelineId; @@ -450,6 +473,11 @@ public Builder setHealthState(ContainerHealthState healthState) { return this; } + public Builder setAckMissing(boolean ackMissing) { + this.ackMissing = ackMissing; + return this; + } + /** * Also resets {@code stateEnterTime}, so make sure to set clock first. */ diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 892dd4de1ff8..716bfadebf2f 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -463,4 +463,19 @@ DecommissionScmResponseProto decommissionScm( */ void reconcileContainer(long containerID) throws IOException; + /** + * Acknowledge the missing container. + * + * @param containerId The ID of the container to acknowledge as missing. + * @throws IOException + */ + void acknowledgeMissingContainer(long containerId) throws IOException; + + /** + * Unacknowledge the missing container. + * + * @param containerId The ID of the container to unacknowledge as missing. + * @throws IOException + */ + void unacknowledgeMissingContainer(long containerId) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 92ddfa7eb8dc..d2ede073a4ee 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -511,4 +511,20 @@ DecommissionScmResponseProto decommissionScm( * @throws IOException On error */ void reconcileContainer(long containerID) throws IOException; + + /** + * Acknowledge the missing container. + * + * @param containerId The ID of the container to acknowledge as missing. + * @throws IOException + */ + void acknowledgeMissingContainer(long containerId) throws IOException; + + /** + * Unacknowledge the missing container. + * + * @param containerId The ID of the container to unacknowledge as missing. + * @throws IOException + */ + void unacknowledgeMissingContainer(long containerId) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 94b2230e68ba..a5410bfaff22 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.UpgradeFinalizationStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoRequestProto; @@ -125,6 +126,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerRequestProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -1240,4 +1242,22 @@ public void reconcileContainer(long containerID) throws IOException { // TODO check error handling. submitRequest(Type.ReconcileContainer, builder -> builder.setReconcileContainerRequest(request)); } + + @Override + public void acknowledgeMissingContainer(long containerID) throws IOException { + AcknowledgeMissingContainerRequestProto request = AcknowledgeMissingContainerRequestProto.newBuilder() + .setContainerID(containerID) + .build(); + submitRequest(Type.AcknowledgeMissingContainer, + builder -> builder.setAcknowledgeMissingContainerRequest(request)); + } + + @Override + public void unacknowledgeMissingContainer(long containerID) throws IOException { + UnacknowledgeMissingContainerRequestProto request = UnacknowledgeMissingContainerRequestProto.newBuilder() + .setContainerID(containerID) + .build(); + submitRequest(Type.UnacknowledgeMissingContainer, + builder -> builder.setUnacknowledgeMissingContainerRequest(request)); + } } diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index f80a50a3be97..8bcfa627df74 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -87,6 +87,8 @@ message ScmContainerLocationRequest { optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48; optional ReconcileContainerRequestProto reconcileContainerRequest = 49; optional GetDeletedBlocksTxnSummaryRequestProto getDeletedBlocksTxnSummaryRequest = 50; + optional AcknowledgeMissingContainerRequestProto acknowledgeMissingContainerRequest = 51; + optional UnacknowledgeMissingContainerRequestProto unacknowledgeMissingContainerRequest = 52; } message ScmContainerLocationResponse { @@ -145,6 +147,8 @@ message ScmContainerLocationResponse { optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48; optional ReconcileContainerResponseProto reconcileContainerResponse = 49; optional GetDeletedBlocksTxnSummaryResponseProto getDeletedBlocksTxnSummaryResponse = 50; + optional AcknowledgeMissingContainerResponseProto acknowledgeMissingContainerResponse = 51; + optional UnacknowledgeMissingContainerResponseProto unacknowledgeMissingContainerResponse = 52; enum Status { OK = 1; @@ -202,6 +206,8 @@ enum Type { GetContainerBalancerStatusInfo = 44; ReconcileContainer = 45; GetDeletedBlocksTransactionSummary = 46; + AcknowledgeMissingContainer = 47; + UnacknowledgeMissingContainer = 48; } /** @@ -695,6 +701,20 @@ message ReconcileContainerRequestProto { message ReconcileContainerResponseProto { } +message AcknowledgeMissingContainerRequestProto { + required int64 containerID = 1; +} + +message AcknowledgeMissingContainerResponseProto { +} + +message UnacknowledgeMissingContainerRequestProto { + required int64 containerID = 1; +} + +message UnacknowledgeMissingContainerResponseProto { +} + /** * Protocol used from an HDFS node to StorageContainerManager. See the request * and response messages for details of the RPC calls. diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index eb819b80a3e8..b95569e3bac2 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -271,6 +271,7 @@ message ContainerInfoProto { optional ReplicationFactor replicationFactor = 10; required ReplicationType replicationType = 11; optional ECReplicationConfig ecReplicationConfig = 12; + optional bool ackMissing = 13; } message ContainerWithPipeline { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java index 370c219ac601..295f2e152814 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManager.java @@ -24,6 +24,7 @@ import java.util.Map; import java.util.Set; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ContainerInfoProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; @@ -222,4 +223,14 @@ void deleteContainer(ContainerID containerID) * @return containerStateManger */ ContainerStateManager getContainerStateManager(); + + /** + * Update container info in the container manager. + * This is used for updating container metadata like ackMissing flag. + * + * @param containerInfo Updated container info proto + * @throws IOException + */ + void updateContainerInfo(ContainerID containerID, ContainerInfoProto containerInfo) + throws IOException; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java index dc701a0be661..64ac028609a1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java @@ -294,6 +294,21 @@ public void updateContainerState(final ContainerID cid, } } + @Override + public void updateContainerInfo(final ContainerID cid, ContainerInfoProto containerInfo) + throws IOException { + lock.lock(); + try { + if (containerExist(cid)) { + containerStateManager.updateContainerInfo(containerInfo); + } else { + throw new ContainerNotFoundException(cid); + } + } finally { + lock.unlock(); + } + } + @Override public void transitionDeletingOrDeletedToClosedState(ContainerID containerID) throws IOException { HddsProtos.ContainerID proto = containerID.getProtobuf(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java index f5a2334b7cd2..4ce28fa40fb2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java @@ -210,4 +210,14 @@ void removeContainer(HddsProtos.ContainerID containerInfo) */ void reinitialize(Table containerStore) throws IOException; + + /** + * Update container info. + * + * @param containerInfo Updated container info proto + * @throws IOException + */ + @Replicate + void updateContainerInfo(HddsProtos.ContainerInfoProto containerInfo) + throws IOException; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index d971b19c406c..3c5de01ddd38 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -551,6 +551,23 @@ public void reinitialize( } } + @Override + public void updateContainerInfo(HddsProtos.ContainerInfoProto updatedInfoProto) + throws IOException { + ContainerInfo updatedInfo = ContainerInfo.fromProtobuf(updatedInfoProto); + ContainerID containerID = updatedInfo.containerID(); + + try (AutoCloseableLock ignored = writeLock(containerID)) { + final ContainerInfo currentInfo = containers.getContainerInfo(containerID); + if (currentInfo == null) { + throw new ContainerNotFoundException(containerID); + } + currentInfo.setAckMissing(updatedInfo.getAckMissing()); + transactionBuffer.addToBuffer(containerStore, containerID, currentInfo); + LOG.debug("Updated container info for container: {}, ackMissing={}", containerID, currentInfo.getAckMissing()); + } + } + private AutoCloseableLock readLock() { return AutoCloseableLock.acquire(lock.readLock()); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 83d3825b66c0..35c84c299bc4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; +import org.apache.hadoop.hdds.scm.container.replication.health.AcknowledgedMissingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosedWithUnhealthyReplicasHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.DeletingContainerHandler; @@ -269,6 +270,7 @@ public ReplicationManager(final ReplicationManagerConfiguration rmConf, .addNext(new MismatchedReplicasHandler(this)) .addNext(new EmptyContainerHandler(this)) .addNext(new DeletingContainerHandler(this)) + .addNext(new AcknowledgedMissingContainerHandler()) .addNext(new QuasiClosedStuckReplicationCheck()) .addNext(ecReplicationCheckHandler) .addNext(ratisReplicationCheckHandler) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java new file mode 100644 index 000000000000..3fe36e4b1ad4 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication.health; + +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class used in Replication Manager to skip containers that have been + * acknowledged as missing. These containers will still be marked as + * MISSING in the health state but will not trigger replication. + */ +public class AcknowledgedMissingContainerHandler extends AbstractCheck { + + private static final Logger LOG = LoggerFactory.getLogger(AcknowledgedMissingContainerHandler.class); + + @Override + public boolean handle(ContainerCheckRequest request) { + ContainerInfo containerInfo = request.getContainerInfo(); + ContainerID containerID = containerInfo.containerID(); + LOG.debug("Checking container {}, ackMissing={} in AcknowledgedMissingContainerHandler", + containerID, containerInfo.getAckMissing()); + + if (!containerInfo.getAckMissing()) { + LOG.debug("Container {} is not acknowledged ", containerID); + return false; + } + LOG.debug("Container {} has been acknowledged as missing.", containerID); + + if (request.getContainerReplicas().isEmpty()) { + LOG.debug("Acknowledged missing container {} confirmed to have no replicas.", containerID); + } else { + LOG.warn("Container {} was acknowledged as missing but now has {} replicas. " + + "The container may have been recovered. Consider un-acknowledging it.", + containerID, request.getContainerReplicas().size()); + } + return true; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java index a09f5079ffe5..fc3cf27895f4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java @@ -68,8 +68,9 @@ public boolean handle(ContainerCheckRequest request) { boolean forceClose = containerInfo.getReplicationConfig() .getReplicationType() != ReplicationType.RATIS; - // TODO - review this logic - may need an empty check here - if (request.getContainerReplicas().isEmpty()) { + // Don't report MISSING if container is acknowledged or empty (will be handled by other handlers) + if (request.getContainerReplicas().isEmpty() && !containerInfo.getAckMissing() && + containerInfo.getNumberOfKeys() > 0) { request.getReport().incrementAndSample(ContainerHealthState.MISSING, containerInfo); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 62f06079bf0b..d069bb49c432 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -47,6 +47,8 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipResponseProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.UpgradeFinalizationStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; @@ -134,6 +136,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -748,6 +752,20 @@ public ScmContainerLocationResponse processRequest( .setStatus(Status.OK) .setReconcileContainerResponse(reconcileContainer(request.getReconcileContainerRequest())) .build(); + case AcknowledgeMissingContainer: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setAcknowledgeMissingContainerResponse( + acknowledgeMissingContainer(request.getAcknowledgeMissingContainerRequest())) + .build(); + case UnacknowledgeMissingContainer: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setUnacknowledgeMissingContainerResponse( + unacknowledgeMissingContainer(request.getUnacknowledgeMissingContainerRequest())) + .build(); default: throw new IllegalArgumentException( "Unknown command type: " + request.getCmdType()); @@ -1387,4 +1405,15 @@ public ReconcileContainerResponseProto reconcileContainer(ReconcileContainerRequ return ReconcileContainerResponseProto.getDefaultInstance(); } + public AcknowledgeMissingContainerResponseProto acknowledgeMissingContainer( + AcknowledgeMissingContainerRequestProto request) throws IOException { + impl.acknowledgeMissingContainer(request.getContainerID()); + return AcknowledgeMissingContainerResponseProto.getDefaultInstance(); + } + + public UnacknowledgeMissingContainerResponseProto unacknowledgeMissingContainer( + UnacknowledgeMissingContainerRequestProto request) throws IOException { + impl.unacknowledgeMissingContainer(request.getContainerID()); + return UnacknowledgeMissingContainerResponseProto.getDefaultInstance(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 5726d3449a7d..28e9a6a1ef65 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -1680,4 +1680,55 @@ public void reconcileContainer(long longContainerID) throws IOException { throw ex; } } + + @Override + public void acknowledgeMissingContainer(long longContainerID) throws IOException { + ContainerID containerID = ContainerID.valueOf(longContainerID); + final Map auditMap = new HashMap<>(); + auditMap.put("containerID", containerID.toString()); + + try { + ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); + Set replicas = scm.getContainerManager().getContainerReplicas(containerID); + if (replicas != null && !replicas.isEmpty()) { + throw new IOException("Container " + longContainerID + + " has " + replicas.size() + " replicas and cannot be acknowledged as missing"); + } + + if (containerInfo.getNumberOfKeys() == 0) { + throw new IOException("Container " + longContainerID + " is empty (0 keys) and cannot be acknowledged."); + } + + HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() + .setAckMissing(true) + .build(); + scm.getContainerManager().updateContainerInfo(containerID, updatedProto); + + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap)); + } catch (IOException ex) { + AUDIT.logWriteFailure(buildAuditMessageForFailure(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap, ex)); + throw ex; + } + } + + @Override + public void unacknowledgeMissingContainer(long longContainerID) throws IOException { + ContainerID containerID = ContainerID.valueOf(longContainerID); + final Map auditMap = new HashMap<>(); + auditMap.put("containerID", containerID.toString()); + + try { + ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); + + HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() + .setAckMissing(false) + .build(); + scm.getContainerManager().updateContainerInfo(containerID, updatedProto); + + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap)); + } catch (IOException ex) { + AUDIT.logWriteFailure(buildAuditMessageForFailure(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap, ex)); + throw ex; + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java index 52cd943c4dbb..e3f0be24a4f6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java @@ -68,7 +68,9 @@ public enum SCMAction implements AuditAction { QUERY_NODE, GET_PIPELINE, RECONCILE_CONTAINER, - GET_DELETED_BLOCK_SUMMARY; + GET_DELETED_BLOCK_SUMMARY, + ACKNOWLEDGE_MISSING_CONTAINER, + UNACKNOWLEDGE_MISSING_CONTAINER; @Override public String getAction() { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java index dca89171f0e3..d75b2d982514 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java @@ -213,7 +213,7 @@ public void testClosingContainerStateIsNotUpdatedWhenThereAreReplicas() { @Test public void testClosingContainerStateIsUpdatedWhenThereAreNotReplicas() { ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( - RATIS_REPLICATION_CONFIG, 1, CLOSING); + RATIS_REPLICATION_CONFIG, 1, CLOSING, 1, 10); Set containerReplicas = new HashSet<>(); ReplicationManagerReport report = new ReplicationManagerReport(rmConf.getContainerSampleLimit()); ContainerCheckRequest request = new ContainerCheckRequest.Builder() diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index 61c0f4150c34..b3e8f24c3982 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -598,4 +598,14 @@ public String getMetrics(String query) throws IOException { public void reconcileContainer(long id) throws IOException { storageContainerLocationClient.reconcileContainer(id); } + + @Override + public void acknowledgeMissingContainer(long containerId) throws IOException { + storageContainerLocationClient.acknowledgeMissingContainer(containerId); + } + + @Override + public void unacknowledgeMissingContainer(long containerId) throws IOException { + storageContainerLocationClient.unacknowledgeMissingContainer(containerId); + } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java new file mode 100644 index 000000000000..e9c8a6ce4910 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.container; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerListResult; +import picocli.CommandLine; + +/** + * Acknowledge missing container(s) to suppress them from Replication Manager Report. + */ +@CommandLine.Command( + name = "ack", + description = "Acknowledge missing container(s) to suppress them from reports", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class AckMissingSubcommand extends ScmSubcommand { + + @CommandLine.Parameters(description = "Container IDs to acknowledge (comma-separated)", + arity = "0..1") + private String containers; + + @CommandLine.Option(names = {"--list"}, + description = "List all acknowledged missing containers") + private boolean list; + + @Override + public void execute(ScmClient scmClient) throws IOException { + if (list) { + // List acknowledged containers + ContainerListResult result = scmClient.listContainer(1, Integer.MAX_VALUE); + for (ContainerInfo info : result.getContainerInfoList()) { + if (info.getAckMissing()) { + out().println(info.getContainerID()); + } + } + } else if (containers != null && !containers.isEmpty()) { + // Acknowledge containers + Set ids = parseContainerIds(containers); + for (Long id : ids) { + try { + int replicaCount = scmClient.getContainerReplicas(id).size(); + if (replicaCount > 0) { + err().println("Cannot acknowledge container " + id + ": has " + replicaCount + " replica(s). " + + "Only containers with 0 replicas can be acknowledged as missing."); + continue; + } + + ContainerInfo containerInfo = scmClient.getContainer(id); + if (containerInfo.getNumberOfKeys() == 0) { + err().println("Cannot acknowledge container " + id + ": container is empty (0 keys). " + + "Empty containers are auto-deleted and don't need acknowledgement."); + continue; + } + + scmClient.acknowledgeMissingContainer(id); + out().println("Acknowledged container: " + id); + } catch (IOException e) { + err().println("Failed to acknowledge container " + id + ": " + e.getMessage()); + } + } + } else { + throw new IllegalArgumentException( + "Either provide container IDs or use --list option"); + } + } + + private Set parseContainerIds(String input) { + return Arrays.stream(input.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(Long::parseLong) + .collect(Collectors.toSet()); + } +} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java index b340c4077f44..aafe88415951 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java @@ -37,7 +37,9 @@ CloseSubcommand.class, ReportSubcommand.class, UpgradeSubcommand.class, - ReconcileSubcommand.class + ReconcileSubcommand.class, + AckMissingSubcommand.class, + UnackMissingSubcommand.class }) @MetaInfServices(AdminSubcommand.class) public class ContainerCommands implements AdminSubcommand { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java new file mode 100644 index 000000000000..c87509122472 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.container; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import picocli.CommandLine; + +/** + * Unacknowledge missing container(s) to report them again. + */ +@CommandLine.Command( + name = "unack", + description = "Unacknowledge missing container(s) to report them again", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class UnackMissingSubcommand extends ScmSubcommand { + + @CommandLine.Parameters(description = "Container IDs to unacknowledge (comma-separated)") + private String containers; + + @Override + public void execute(ScmClient scmClient) throws IOException { + if (containers == null || containers.isEmpty()) { + throw new IllegalArgumentException( + "Container IDs must be provided"); + } + + Set ids = parseContainerIds(containers); + for (Long id : ids) { + try { + ContainerInfo containerInfo = scmClient.getContainer(id); + if (!containerInfo.getAckMissing()) { + err().println("Cannot unacknowledge container " + id + ": " + + "Only acknowledged missing containers can be unacknowledged."); + continue; + } + scmClient.unacknowledgeMissingContainer(id); + out().println("Unacknowledged container: " + id); + } catch (IOException e) { + err().println("Failed to unacknowledge container " + id + ": " + e.getMessage()); + } + } + } + + private Set parseContainerIds(String input) { + return Arrays.stream(input.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(Long::parseLong) + .collect(Collectors.toSet()); + } +} diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java new file mode 100644 index 000000000000..b625cda61f9c --- /dev/null +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.container; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerListResult; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import picocli.CommandLine; + +/** + * Tests for AckMissingContainerSubcommand. + */ +public class TestAckMissingContainerSubcommand { + + private ScmClient scmClient; + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + + @BeforeEach + public void setup() throws IOException { + scmClient = mock(ScmClient.class); + + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); + } + + @Test + public void testAckMissingContainer() throws Exception { + ContainerInfo container = mockContainer(1, false); + when(scmClient.getContainer(1L)).thenReturn(container); + + AckMissingSubcommand cmd = new AckMissingSubcommand(); + new CommandLine(cmd).parseArgs("1"); + cmd.execute(scmClient); + verify(scmClient, times(1)).acknowledgeMissingContainer(1L); + + String output = outContent.toString(DEFAULT_ENCODING); + assertThat(output).contains("Acknowledged container: 1"); + } + + @Test + public void testListAcknowledgedContainers() throws Exception { + ContainerInfo container1 = mockContainer(1, true); + ContainerInfo container2 = mockContainer(2, false); + + List allContainers = Arrays.asList(container1, container2); + ContainerListResult result = new ContainerListResult(allContainers, 2); + when(scmClient.listContainer(anyLong(), anyInt())).thenReturn(result); + + AckMissingSubcommand cmd = new AckMissingSubcommand(); + new CommandLine(cmd).parseArgs("--list"); + cmd.execute(scmClient); + + String output = outContent.toString(DEFAULT_ENCODING); + assertThat(output).contains("1"); + assertThat(output).doesNotContain("2"); + } + + @Test + public void testUnacknowledgeMissingContainer() throws Exception { + ContainerInfo container = mockContainer(1, true); + when(scmClient.getContainer(1L)).thenReturn(container); + + UnackMissingSubcommand cmd = new UnackMissingSubcommand(); + new CommandLine(cmd).parseArgs("1"); + cmd.execute(scmClient); + verify(scmClient, times(1)).unacknowledgeMissingContainer(1L); + + String output = outContent.toString(DEFAULT_ENCODING); + assertThat(output).contains("Unacknowledged container: 1"); + } + + private ContainerInfo mockContainer(long containerID, boolean ackMissing) { + return new ContainerInfo.Builder() + .setContainerID(containerID) + .setState(OPEN) + .setHealthState(ContainerHealthState.MISSING) + .setReplicationConfig(RatisReplicationConfig.getInstance(ONE)) + .setNumberOfKeys(1) + .setAckMissing(ackMissing) + .build(); + } +} From 2efa8aafde566c7a2c483ea538131cf787c9a812 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Wed, 11 Feb 2026 10:50:51 +0530 Subject: [PATCH 2/9] Add admin check and rename getter method --- .../hadoop/hdds/scm/container/ContainerInfo.java | 10 +++++----- .../hdds/scm/container/ContainerStateManagerImpl.java | 4 ++-- .../health/AcknowledgedMissingContainerHandler.java | 4 ++-- .../replication/health/ClosingContainerHandler.java | 2 +- .../hdds/scm/server/SCMClientProtocolServer.java | 2 ++ .../hdds/scm/cli/container/AckMissingSubcommand.java | 2 +- .../hdds/scm/cli/container/UnackMissingSubcommand.java | 2 +- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index b53ae71b7ddb..183991a04097 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -271,17 +271,17 @@ public void setHealthState(ContainerHealthState newHealthState) { * * @return boolean */ - public boolean getAckMissing() { + public boolean isAckMissing() { return ackMissing; } /** * Set the boolean for ackMissing. * - * @param isAckMissing checks if container is acked as missing or not + * @param acked checks if container is acked as missing or not */ - public void setAckMissing(boolean isAckMissing) { - this.ackMissing = isAckMissing; + public void setAckMissing(boolean acked) { + this.ackMissing = acked; } @JsonIgnore @@ -297,7 +297,7 @@ public HddsProtos.ContainerInfoProto getProtobuf() { .setOwner(getOwner()) .setSequenceId(getSequenceId()) .setReplicationType(getReplicationType()) - .setAckMissing(getAckMissing()); + .setAckMissing(isAckMissing()); if (replicationConfig instanceof ECReplicationConfig) { builder.setEcReplicationConfig(((ECReplicationConfig) replicationConfig) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 3c5de01ddd38..b794e8a6c919 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -562,9 +562,9 @@ public void updateContainerInfo(HddsProtos.ContainerInfoProto updatedInfoProto) if (currentInfo == null) { throw new ContainerNotFoundException(containerID); } - currentInfo.setAckMissing(updatedInfo.getAckMissing()); + currentInfo.setAckMissing(updatedInfo.isAckMissing()); transactionBuffer.addToBuffer(containerStore, containerID, currentInfo); - LOG.debug("Updated container info for container: {}, ackMissing={}", containerID, currentInfo.getAckMissing()); + LOG.debug("Updated container info for container: {}, ackMissing={}", containerID, currentInfo.isAckMissing()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java index 3fe36e4b1ad4..20f217cf98bb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java @@ -37,9 +37,9 @@ public boolean handle(ContainerCheckRequest request) { ContainerInfo containerInfo = request.getContainerInfo(); ContainerID containerID = containerInfo.containerID(); LOG.debug("Checking container {}, ackMissing={} in AcknowledgedMissingContainerHandler", - containerID, containerInfo.getAckMissing()); + containerID, containerInfo.isAckMissing()); - if (!containerInfo.getAckMissing()) { + if (!containerInfo.isAckMissing()) { LOG.debug("Container {} is not acknowledged ", containerID); return false; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java index fc3cf27895f4..1d5bc206ef79 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java @@ -69,7 +69,7 @@ public boolean handle(ContainerCheckRequest request) { .getReplicationType() != ReplicationType.RATIS; // Don't report MISSING if container is acknowledged or empty (will be handled by other handlers) - if (request.getContainerReplicas().isEmpty() && !containerInfo.getAckMissing() && + if (request.getContainerReplicas().isEmpty() && !containerInfo.isAckMissing() && containerInfo.getNumberOfKeys() > 0) { request.getReport().incrementAndSample(ContainerHealthState.MISSING, containerInfo); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 28e9a6a1ef65..9b9f3582c30f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -1688,6 +1688,7 @@ public void acknowledgeMissingContainer(long longContainerID) throws IOException auditMap.put("containerID", containerID.toString()); try { + getScm().checkAdminAccess(getRemoteUser(), false); ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); Set replicas = scm.getContainerManager().getContainerReplicas(containerID); if (replicas != null && !replicas.isEmpty()) { @@ -1718,6 +1719,7 @@ public void unacknowledgeMissingContainer(long longContainerID) throws IOExcepti auditMap.put("containerID", containerID.toString()); try { + getScm().checkAdminAccess(getRemoteUser(), false); ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java index e9c8a6ce4910..88bac71ee95b 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java @@ -52,7 +52,7 @@ public void execute(ScmClient scmClient) throws IOException { // List acknowledged containers ContainerListResult result = scmClient.listContainer(1, Integer.MAX_VALUE); for (ContainerInfo info : result.getContainerInfoList()) { - if (info.getAckMissing()) { + if (info.isAckMissing()) { out().println(info.getContainerID()); } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java index c87509122472..5bb940912095 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java @@ -51,7 +51,7 @@ public void execute(ScmClient scmClient) throws IOException { for (Long id : ids) { try { ContainerInfo containerInfo = scmClient.getContainer(id); - if (!containerInfo.getAckMissing()) { + if (!containerInfo.isAckMissing()) { err().println("Cannot unacknowledge container " + id + ": " + "Only acknowledged missing containers can be unacknowledged."); continue; From 0088a45f9f2003b6145020645f02e4f42e2f0368 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 19 Feb 2026 11:03:49 +0530 Subject: [PATCH 3/9] Single API to set/unset of healthState ACK_MISSING --- .../scm/container/ContainerHealthState.java | 7 +++ .../hdds/scm/container/ContainerInfo.java | 32 +--------- .../hadoop/hdds/scm/client/ScmClient.java | 15 ++--- .../StorageContainerLocationProtocol.java | 15 ++--- ...ocationProtocolClientSideTranslatorPB.java | 21 ++----- .../src/main/proto/ScmAdminProtocol.proto | 23 +++---- .../src/main/proto/hdds.proto | 1 - .../container/ContainerStateManagerImpl.java | 15 ++++- .../replication/ReplicationManager.java | 11 +++- .../AcknowledgedMissingContainerHandler.java | 57 ----------------- .../health/ClosingContainerHandler.java | 5 +- ...ocationProtocolServerSideTranslatorPB.java | 33 +++------- .../scm/server/SCMClientProtocolServer.java | 63 +++++++------------ .../scm/cli/ContainerOperationClient.java | 9 +-- .../cli/container/AckMissingSubcommand.java | 5 +- .../cli/container/UnackMissingSubcommand.java | 5 +- .../TestAckMissingContainerSubcommand.java | 9 +-- 17 files changed, 100 insertions(+), 226 deletions(-) delete mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java index 56dd1c5620b6..ac9a82d190d1 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java @@ -107,6 +107,13 @@ public enum ContainerHealthState { "Containers in OPEN state without any healthy Pipeline", "OpenContainersWithoutPipeline"), + /** + * Acknowledge missing containers which are not problematic. + */ + ACK_MISSING((short) 10, + "Acknowledge missing containers which are not problematic", + "AcknowledgeMissingContainers"), + // ========== Actual Combinations Found in Code (100+) ========== /** diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index 183991a04097..2beef2abf885 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -87,7 +87,6 @@ public final class ContainerInfo implements Comparable { private long sequenceId; // Health state of the container (determined by ReplicationManager) private ContainerHealthState healthState; - private boolean ackMissing; private ContainerInfo(Builder b) { containerID = ContainerID.valueOf(b.containerID); @@ -103,7 +102,6 @@ private ContainerInfo(Builder b) { replicationConfig = b.replicationConfig; clock = b.clock; healthState = b.healthState != null ? b.healthState : ContainerHealthState.HEALTHY; - ackMissing = b.ackMissing; } public static Codec getCodec() { @@ -123,8 +121,7 @@ public static ContainerInfo fromProtobuf(HddsProtos.ContainerInfoProto info) { .setContainerID(info.getContainerID()) .setDeleteTransactionId(info.getDeleteTransactionId()) .setReplicationConfig(config) - .setSequenceId(info.getSequenceId()) - .setAckMissing(info.getAckMissing()); + .setSequenceId(info.getSequenceId()); if (info.hasPipelineID()) { builder.setPipelineID(PipelineID.getFromProtobuf(info.getPipelineID())); @@ -266,24 +263,6 @@ public void setHealthState(ContainerHealthState newHealthState) { this.healthState = newHealthState; } - /** - * Check if container is acked as missing. - * - * @return boolean - */ - public boolean isAckMissing() { - return ackMissing; - } - - /** - * Set the boolean for ackMissing. - * - * @param acked checks if container is acked as missing or not - */ - public void setAckMissing(boolean acked) { - this.ackMissing = acked; - } - @JsonIgnore public HddsProtos.ContainerInfoProto getProtobuf() { HddsProtos.ContainerInfoProto.Builder builder = @@ -296,8 +275,7 @@ public HddsProtos.ContainerInfoProto getProtobuf() { .setDeleteTransactionId(getDeleteTransactionId()) .setOwner(getOwner()) .setSequenceId(getSequenceId()) - .setReplicationType(getReplicationType()) - .setAckMissing(isAckMissing()); + .setReplicationType(getReplicationType()); if (replicationConfig instanceof ECReplicationConfig) { builder.setEcReplicationConfig(((ECReplicationConfig) replicationConfig) @@ -415,7 +393,6 @@ public static class Builder { private PipelineID pipelineID; private ReplicationConfig replicationConfig; private ContainerHealthState healthState; - private boolean ackMissing; public Builder setPipelineID(PipelineID pipelineId) { this.pipelineID = pipelineId; @@ -473,11 +450,6 @@ public Builder setHealthState(ContainerHealthState healthState) { return this; } - public Builder setAckMissing(boolean ackMissing) { - this.ackMissing = ackMissing; - return this; - } - /** * Also resets {@code stateEnterTime}, so make sure to set clock first. */ diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 716bfadebf2f..ea2b5b2848c9 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -464,18 +464,11 @@ DecommissionScmResponseProto decommissionScm( void reconcileContainer(long containerID) throws IOException; /** - * Acknowledge the missing container. + * Set or unset the ACK_MISSING state for a container. * - * @param containerId The ID of the container to acknowledge as missing. + * @param containerId The ID of the container. + * @param acknowledge true to set ACK_MISSING, false to unset to MISSING. * @throws IOException */ - void acknowledgeMissingContainer(long containerId) throws IOException; - - /** - * Unacknowledge the missing container. - * - * @param containerId The ID of the container to unacknowledge as missing. - * @throws IOException - */ - void unacknowledgeMissingContainer(long containerId) throws IOException; + void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index d2ede073a4ee..83a9e70c572a 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -513,18 +513,11 @@ DecommissionScmResponseProto decommissionScm( void reconcileContainer(long containerID) throws IOException; /** - * Acknowledge the missing container. + * Set or unset the ACK_MISSING state for a container. * - * @param containerId The ID of the container to acknowledge as missing. + * @param containerId The ID of the container. + * @param acknowledge true to set ACK_MISSING, false to unset to MISSING. * @throws IOException */ - void acknowledgeMissingContainer(long containerId) throws IOException; - - /** - * Unacknowledge the missing container. - * - * @param containerId The ID of the container to unacknowledge as missing. - * @throws IOException - */ - void unacknowledgeMissingContainer(long containerId) throws IOException; + void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index a5410bfaff22..98795e746fe5 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -50,7 +50,6 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.UpgradeFinalizationStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoRequestProto; @@ -116,6 +115,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest.Builder; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerRequestProto; @@ -126,7 +126,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerRequestProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -1244,20 +1243,12 @@ public void reconcileContainer(long containerID) throws IOException { } @Override - public void acknowledgeMissingContainer(long containerID) throws IOException { - AcknowledgeMissingContainerRequestProto request = AcknowledgeMissingContainerRequestProto.newBuilder() - .setContainerID(containerID) - .build(); - submitRequest(Type.AcknowledgeMissingContainer, - builder -> builder.setAcknowledgeMissingContainerRequest(request)); - } - - @Override - public void unacknowledgeMissingContainer(long containerID) throws IOException { - UnacknowledgeMissingContainerRequestProto request = UnacknowledgeMissingContainerRequestProto.newBuilder() + public void setAckMissingContainer(long containerID, boolean acknowledge) + throws IOException { + SetAckMissingContainerRequestProto request = SetAckMissingContainerRequestProto.newBuilder() .setContainerID(containerID) + .setAcknowledge(acknowledge) .build(); - submitRequest(Type.UnacknowledgeMissingContainer, - builder -> builder.setUnacknowledgeMissingContainerRequest(request)); + submitRequest(Type.SetAckMissingContainer, builder -> builder.setSetAckMissingContainerRequest(request)); } } diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index 8bcfa627df74..6cb9be66df22 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -87,8 +87,7 @@ message ScmContainerLocationRequest { optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48; optional ReconcileContainerRequestProto reconcileContainerRequest = 49; optional GetDeletedBlocksTxnSummaryRequestProto getDeletedBlocksTxnSummaryRequest = 50; - optional AcknowledgeMissingContainerRequestProto acknowledgeMissingContainerRequest = 51; - optional UnacknowledgeMissingContainerRequestProto unacknowledgeMissingContainerRequest = 52; + optional SetAckMissingContainerRequestProto setAckMissingContainerRequest = 51; } message ScmContainerLocationResponse { @@ -147,8 +146,7 @@ message ScmContainerLocationResponse { optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48; optional ReconcileContainerResponseProto reconcileContainerResponse = 49; optional GetDeletedBlocksTxnSummaryResponseProto getDeletedBlocksTxnSummaryResponse = 50; - optional AcknowledgeMissingContainerResponseProto acknowledgeMissingContainerResponse = 51; - optional UnacknowledgeMissingContainerResponseProto unacknowledgeMissingContainerResponse = 52; + optional SetAckMissingContainerResponseProto setAckMissingContainerResponse = 51; enum Status { OK = 1; @@ -206,8 +204,7 @@ enum Type { GetContainerBalancerStatusInfo = 44; ReconcileContainer = 45; GetDeletedBlocksTransactionSummary = 46; - AcknowledgeMissingContainer = 47; - UnacknowledgeMissingContainer = 48; + SetAckMissingContainer = 47; } /** @@ -701,18 +698,12 @@ message ReconcileContainerRequestProto { message ReconcileContainerResponseProto { } -message AcknowledgeMissingContainerRequestProto { - required int64 containerID = 1; -} - -message AcknowledgeMissingContainerResponseProto { -} - -message UnacknowledgeMissingContainerRequestProto { - required int64 containerID = 1; +message SetAckMissingContainerRequestProto { + optional int64 containerID = 1; + optional bool acknowledge = 2; } -message UnacknowledgeMissingContainerResponseProto { +message SetAckMissingContainerResponseProto { } /** diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index b95569e3bac2..eb819b80a3e8 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -271,7 +271,6 @@ message ContainerInfoProto { optional ReplicationFactor replicationFactor = 10; required ReplicationType replicationType = 11; optional ECReplicationConfig ecReplicationConfig = 12; - optional bool ackMissing = 13; } message ContainerWithPipeline { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index b794e8a6c919..e7c216b8f422 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -562,9 +562,20 @@ public void updateContainerInfo(HddsProtos.ContainerInfoProto updatedInfoProto) if (currentInfo == null) { throw new ContainerNotFoundException(containerID); } - currentInfo.setAckMissing(updatedInfo.isAckMissing()); + + // Only persist ACK_MISSING health state changes + // Other health states are dynamic and computed by ReplicationManager + ContainerHealthState newHealthState = updatedInfo.getHealthState(); + if (newHealthState == ContainerHealthState.ACK_MISSING) { + currentInfo.setHealthState(ContainerHealthState.ACK_MISSING); + LOG.debug("Persisting ACK_MISSING state for container: {}", containerID); + } else if (currentInfo.getHealthState() == ContainerHealthState.ACK_MISSING) { + currentInfo.setHealthState(null); + LOG.debug("Clearing ACK_MISSING state for container: {}, new state: {}", + containerID, newHealthState); + } transactionBuffer.addToBuffer(containerStore, containerID, currentInfo); - LOG.debug("Updated container info for container: {}, ackMissing={}", containerID, currentInfo.isAckMissing()); + LOG.debug("Updated container info for container: {}, healthState={}", containerID, currentInfo.getHealthState()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 35c84c299bc4..d1d317108c89 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -55,13 +55,13 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ReplicationCommandPriority; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.hdds.scm.PlacementPolicy; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; -import org.apache.hadoop.hdds.scm.container.replication.health.AcknowledgedMissingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosedWithUnhealthyReplicasHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.DeletingContainerHandler; @@ -270,7 +270,6 @@ public ReplicationManager(final ReplicationManagerConfiguration rmConf, .addNext(new MismatchedReplicasHandler(this)) .addNext(new EmptyContainerHandler(this)) .addNext(new DeletingContainerHandler(this)) - .addNext(new AcknowledgedMissingContainerHandler()) .addNext(new QuasiClosedStuckReplicationCheck()) .addNext(ecReplicationCheckHandler) .addNext(ratisReplicationCheckHandler) @@ -856,6 +855,14 @@ protected boolean processContainer(ContainerInfo containerInfo, ReplicationQueue repQueue, ReplicationManagerReport report, boolean readOnly) throws ContainerNotFoundException { synchronized (containerInfo) { + // Skip containers that are acknowledged as missing + // These containers are persisted with ACK_MISSING state and should not be + // processed by ReplicationManager until unacknowledged + if (containerInfo.getHealthState() == ContainerHealthState.ACK_MISSING) { + LOG.debug("Skipping ACK_MISSING container: {}", containerInfo.getContainerID()); + return false; + } + // Reset health state to HEALTHY before processing this container report.resetContainerHealthState(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java deleted file mode 100644 index 20f217cf98bb..000000000000 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/AcknowledgedMissingContainerHandler.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm.container.replication.health; - -import org.apache.hadoop.hdds.scm.container.ContainerID; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Class used in Replication Manager to skip containers that have been - * acknowledged as missing. These containers will still be marked as - * MISSING in the health state but will not trigger replication. - */ -public class AcknowledgedMissingContainerHandler extends AbstractCheck { - - private static final Logger LOG = LoggerFactory.getLogger(AcknowledgedMissingContainerHandler.class); - - @Override - public boolean handle(ContainerCheckRequest request) { - ContainerInfo containerInfo = request.getContainerInfo(); - ContainerID containerID = containerInfo.containerID(); - LOG.debug("Checking container {}, ackMissing={} in AcknowledgedMissingContainerHandler", - containerID, containerInfo.isAckMissing()); - - if (!containerInfo.isAckMissing()) { - LOG.debug("Container {} is not acknowledged ", containerID); - return false; - } - LOG.debug("Container {} has been acknowledged as missing.", containerID); - - if (request.getContainerReplicas().isEmpty()) { - LOG.debug("Acknowledged missing container {} confirmed to have no replicas.", containerID); - } else { - LOG.warn("Container {} was acknowledged as missing but now has {} replicas. " + - "The container may have been recovered. Consider un-acknowledging it.", - containerID, request.getContainerReplicas().size()); - } - return true; - } -} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java index 1d5bc206ef79..2f1e5bdf897d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java @@ -68,9 +68,8 @@ public boolean handle(ContainerCheckRequest request) { boolean forceClose = containerInfo.getReplicationConfig() .getReplicationType() != ReplicationType.RATIS; - // Don't report MISSING if container is acknowledged or empty (will be handled by other handlers) - if (request.getContainerReplicas().isEmpty() && !containerInfo.isAckMissing() && - containerInfo.getNumberOfKeys() > 0) { + // Report MISSING only for containers with no replicas and keys > 0 + if (request.getContainerReplicas().isEmpty() && containerInfo.getNumberOfKeys() > 0) { request.getReport().incrementAndSample(ContainerHealthState.MISSING, containerInfo); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index d069bb49c432..e9eca075a183 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -47,8 +47,6 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipResponseProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.UpgradeFinalizationStatus; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerRequestProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.AcknowledgeMissingContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ActivatePipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto; @@ -124,6 +122,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse.Status; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerRequestProto; @@ -136,8 +136,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerResponseProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerRequestProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.UnacknowledgeMissingContainerResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -752,19 +750,12 @@ public ScmContainerLocationResponse processRequest( .setStatus(Status.OK) .setReconcileContainerResponse(reconcileContainer(request.getReconcileContainerRequest())) .build(); - case AcknowledgeMissingContainer: + case SetAckMissingContainer: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) .setStatus(Status.OK) - .setAcknowledgeMissingContainerResponse( - acknowledgeMissingContainer(request.getAcknowledgeMissingContainerRequest())) - .build(); - case UnacknowledgeMissingContainer: - return ScmContainerLocationResponse.newBuilder() - .setCmdType(request.getCmdType()) - .setStatus(Status.OK) - .setUnacknowledgeMissingContainerResponse( - unacknowledgeMissingContainer(request.getUnacknowledgeMissingContainerRequest())) + .setSetAckMissingContainerResponse( + setAckMissingContainer(request.getSetAckMissingContainerRequest())) .build(); default: throw new IllegalArgumentException( @@ -1405,15 +1396,9 @@ public ReconcileContainerResponseProto reconcileContainer(ReconcileContainerRequ return ReconcileContainerResponseProto.getDefaultInstance(); } - public AcknowledgeMissingContainerResponseProto acknowledgeMissingContainer( - AcknowledgeMissingContainerRequestProto request) throws IOException { - impl.acknowledgeMissingContainer(request.getContainerID()); - return AcknowledgeMissingContainerResponseProto.getDefaultInstance(); - } - - public UnacknowledgeMissingContainerResponseProto unacknowledgeMissingContainer( - UnacknowledgeMissingContainerRequestProto request) throws IOException { - impl.unacknowledgeMissingContainer(request.getContainerID()); - return UnacknowledgeMissingContainerResponseProto.getDefaultInstance(); + public SetAckMissingContainerResponseProto setAckMissingContainer( + SetAckMissingContainerRequestProto request) throws IOException { + impl.setAckMissingContainer(request.getContainerID(), request.getAcknowledge()); + return SetAckMissingContainerResponseProto.getDefaultInstance(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 9b9f3582c30f..bc25344d9786 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.FetchMetrics; import org.apache.hadoop.hdds.scm.ScmInfo; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerListResult; @@ -1682,54 +1683,38 @@ public void reconcileContainer(long longContainerID) throws IOException { } @Override - public void acknowledgeMissingContainer(long longContainerID) throws IOException { + public void setAckMissingContainer(long longContainerID, boolean acknowledge) throws IOException { ContainerID containerID = ContainerID.valueOf(longContainerID); final Map auditMap = new HashMap<>(); auditMap.put("containerID", containerID.toString()); + auditMap.put("acknowledge", String.valueOf(acknowledge)); try { getScm().checkAdminAccess(getRemoteUser(), false); ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); - Set replicas = scm.getContainerManager().getContainerReplicas(containerID); - if (replicas != null && !replicas.isEmpty()) { - throw new IOException("Container " + longContainerID + - " has " + replicas.size() + " replicas and cannot be acknowledged as missing"); - } - - if (containerInfo.getNumberOfKeys() == 0) { - throw new IOException("Container " + longContainerID + " is empty (0 keys) and cannot be acknowledged."); + + if (acknowledge) { + // Validation for setting ACK_MISSING + Set replicas = scm.getContainerManager().getContainerReplicas(containerID); + if (replicas != null && !replicas.isEmpty()) { + throw new IOException("Container " + longContainerID + " has " + replicas.size() + + " replicas and cannot be acknowledged as missing"); + } + if (containerInfo.getNumberOfKeys() == 0) { + throw new IOException("Container " + longContainerID + " is empty (0 keys) and cannot be acknowledged."); + } + // Set to ACK_MISSING + containerInfo.setHealthState(ContainerHealthState.ACK_MISSING); + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap)); + } else { + containerInfo.setHealthState(null); + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap)); } - - HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() - .setAckMissing(true) - .build(); - scm.getContainerManager().updateContainerInfo(containerID, updatedProto); - - AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap)); - } catch (IOException ex) { - AUDIT.logWriteFailure(buildAuditMessageForFailure(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap, ex)); - throw ex; - } - } - - @Override - public void unacknowledgeMissingContainer(long longContainerID) throws IOException { - ContainerID containerID = ContainerID.valueOf(longContainerID); - final Map auditMap = new HashMap<>(); - auditMap.put("containerID", containerID.toString()); - - try { - getScm().checkAdminAccess(getRemoteUser(), false); - ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); - - HddsProtos.ContainerInfoProto updatedProto = containerInfo.getProtobuf().toBuilder() - .setAckMissing(false) - .build(); - scm.getContainerManager().updateContainerInfo(containerID, updatedProto); - - AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap)); + scm.getContainerManager().updateContainerInfo(containerID, containerInfo.getProtobuf()); } catch (IOException ex) { - AUDIT.logWriteFailure(buildAuditMessageForFailure(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap, ex)); + SCMAction action = acknowledge ? + SCMAction.ACKNOWLEDGE_MISSING_CONTAINER : SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER; + AUDIT.logWriteFailure(buildAuditMessageForFailure(action, auditMap, ex)); throw ex; } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index b3e8f24c3982..0774fb62df31 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -600,12 +600,7 @@ public void reconcileContainer(long id) throws IOException { } @Override - public void acknowledgeMissingContainer(long containerId) throws IOException { - storageContainerLocationClient.acknowledgeMissingContainer(containerId); - } - - @Override - public void unacknowledgeMissingContainer(long containerId) throws IOException { - storageContainerLocationClient.unacknowledgeMissingContainer(containerId); + public void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException { + storageContainerLocationClient.setAckMissingContainer(containerId, acknowledge); } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java index 88bac71ee95b..49cb86b9c9e3 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerListResult; import picocli.CommandLine; @@ -52,7 +53,7 @@ public void execute(ScmClient scmClient) throws IOException { // List acknowledged containers ContainerListResult result = scmClient.listContainer(1, Integer.MAX_VALUE); for (ContainerInfo info : result.getContainerInfoList()) { - if (info.isAckMissing()) { + if (info.getHealthState() == ContainerHealthState.ACK_MISSING) { out().println(info.getContainerID()); } } @@ -75,7 +76,7 @@ public void execute(ScmClient scmClient) throws IOException { continue; } - scmClient.acknowledgeMissingContainer(id); + scmClient.setAckMissingContainer(id, true); out().println("Acknowledged container: " + id); } catch (IOException e) { err().println("Failed to acknowledge container " + id + ": " + e.getMessage()); diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java index 5bb940912095..bafaedcdb184 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import picocli.CommandLine; @@ -51,12 +52,12 @@ public void execute(ScmClient scmClient) throws IOException { for (Long id : ids) { try { ContainerInfo containerInfo = scmClient.getContainer(id); - if (!containerInfo.isAckMissing()) { + if (containerInfo.getHealthState() != ContainerHealthState.ACK_MISSING) { err().println("Cannot unacknowledge container " + id + ": " + "Only acknowledged missing containers can be unacknowledged."); continue; } - scmClient.unacknowledgeMissingContainer(id); + scmClient.setAckMissingContainer(id, false); out().println("Unacknowledged container: " + id); } catch (IOException e) { err().println("Failed to unacknowledge container " + id + ": " + e.getMessage()); diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java index b625cda61f9c..dc75850a58a4 100644 --- a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java @@ -68,7 +68,7 @@ public void testAckMissingContainer() throws Exception { AckMissingSubcommand cmd = new AckMissingSubcommand(); new CommandLine(cmd).parseArgs("1"); cmd.execute(scmClient); - verify(scmClient, times(1)).acknowledgeMissingContainer(1L); + verify(scmClient, times(1)).setAckMissingContainer(1L, true); String output = outContent.toString(DEFAULT_ENCODING); assertThat(output).contains("Acknowledged container: 1"); @@ -100,20 +100,21 @@ public void testUnacknowledgeMissingContainer() throws Exception { UnackMissingSubcommand cmd = new UnackMissingSubcommand(); new CommandLine(cmd).parseArgs("1"); cmd.execute(scmClient); - verify(scmClient, times(1)).unacknowledgeMissingContainer(1L); + verify(scmClient, times(1)).setAckMissingContainer(1L, false); String output = outContent.toString(DEFAULT_ENCODING); assertThat(output).contains("Unacknowledged container: 1"); } private ContainerInfo mockContainer(long containerID, boolean ackMissing) { + ContainerHealthState healthState = ackMissing ? + ContainerHealthState.ACK_MISSING : ContainerHealthState.MISSING; return new ContainerInfo.Builder() .setContainerID(containerID) .setState(OPEN) - .setHealthState(ContainerHealthState.MISSING) + .setHealthState(healthState) .setReplicationConfig(RatisReplicationConfig.getInstance(ONE)) .setNumberOfKeys(1) - .setAckMissing(ackMissing) .build(); } } From 563a345331de600d49d3d8f3425b2630d5fc8550 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 19 Feb 2026 12:27:32 +0530 Subject: [PATCH 4/9] Persist the healthState --- .../apache/hadoop/hdds/scm/container/ContainerInfo.java | 9 +++++++++ hadoop-hdds/interface-client/src/main/proto/hdds.proto | 1 + 2 files changed, 10 insertions(+) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index 2beef2abf885..40d82bfa6a48 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -123,6 +123,10 @@ public static ContainerInfo fromProtobuf(HddsProtos.ContainerInfoProto info) { .setReplicationConfig(config) .setSequenceId(info.getSequenceId()); + if (info.hasAckMissing() && info.getAckMissing()) { + builder.setHealthState(ContainerHealthState.ACK_MISSING); + } + if (info.hasPipelineID()) { builder.setPipelineID(PipelineID.getFromProtobuf(info.getPipelineID())); } @@ -291,6 +295,11 @@ public HddsProtos.ContainerInfoProto getProtobuf() { builder.setPipelineID(getPipelineID().getProtobuf()); } + // Only persist ACK_MISSING health state, others are dynamic + if (healthState == ContainerHealthState.ACK_MISSING) { + builder.setAckMissing(true); + } + return builder.build(); } diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index eb819b80a3e8..b95569e3bac2 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -271,6 +271,7 @@ message ContainerInfoProto { optional ReplicationFactor replicationFactor = 10; required ReplicationType replicationType = 11; optional ECReplicationConfig ecReplicationConfig = 12; + optional bool ackMissing = 13; } message ContainerWithPipeline { From 9c210c8fd75bdeaadd47370f8aa850990714723b Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 19 Feb 2026 12:46:54 +0530 Subject: [PATCH 5/9] Fix TestContainerHealthSate --- .../scm/container/TestContainerHealthState.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java index 6ffc678ea175..763a2e5a9a21 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java @@ -17,6 +17,7 @@ package org.apache.hadoop.hdds.scm.container; +import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.ACK_MISSING; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.EMPTY; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.HEALTHY; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.MISSING; @@ -37,6 +38,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; import org.junit.jupiter.api.Test; /** @@ -59,6 +63,7 @@ public void testIndividualStateValues() { assertEquals(7, OPEN_UNHEALTHY.getValue()); assertEquals(8, QUASI_CLOSED_STUCK.getValue()); assertEquals(9, OPEN_WITHOUT_PIPELINE.getValue()); + assertEquals(10, ACK_MISSING.getValue()); } @Test @@ -69,7 +74,7 @@ public void testCombinationStateValues() { assertEquals(102, MISSING_UNDER_REPLICATED.getValue()); assertEquals(103, QUASI_CLOSED_STUCK_UNDER_REPLICATED.getValue()); assertEquals(104, QUASI_CLOSED_STUCK_OVER_REPLICATED.getValue()); - assertEquals(105, ContainerHealthState.QUASI_CLOSED_STUCK_MISSING.getValue()); + assertEquals(105, QUASI_CLOSED_STUCK_MISSING.getValue()); } @Test @@ -101,6 +106,7 @@ public void testFromValueIndividualStates() { assertEquals(OPEN_UNHEALTHY, ContainerHealthState.fromValue((short) 7)); assertEquals(QUASI_CLOSED_STUCK, ContainerHealthState.fromValue((short) 8)); assertEquals(OPEN_WITHOUT_PIPELINE, ContainerHealthState.fromValue((short) 9)); + assertEquals(ACK_MISSING, ContainerHealthState.fromValue((short) 10)); } @Test @@ -126,7 +132,7 @@ public void testFromValueUnknownReturnsHealthy() { @Test public void testAllEnumValuesAreUnique() { // Verify all enum constants have unique values - java.util.Set values = new java.util.HashSet<>(); + Set values = new HashSet<>(); for (ContainerHealthState state : ContainerHealthState.values()) { assertFalse(values.contains(state.getValue()), "Duplicate value found: " + state.getValue()); @@ -137,16 +143,16 @@ public void testAllEnumValuesAreUnique() { @Test public void testIndividualStateCount() { // Should have 10 individual states (0-9) - long individualCount = java.util.Arrays.stream(ContainerHealthState.values()) + long individualCount = Arrays.stream(ContainerHealthState.values()) .filter(s -> s.getValue() >= 0 && s.getValue() <= 99) .count(); - assertEquals(10, individualCount, "Expected 10 individual states"); + assertEquals(11, individualCount, "Expected 10 individual states"); } @Test public void testCombinationStateCount() { // Should have 6 combination states (100-105) - long combinationCount = java.util.Arrays.stream(ContainerHealthState.values()) + long combinationCount = Arrays.stream(ContainerHealthState.values()) .filter(s -> s.getValue() >= 100) .count(); assertEquals(6, combinationCount, "Expected 6 combination states"); From 875854b45ebbd610b58a4be8f1abac733cf68102 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 26 Mar 2026 12:01:13 +0530 Subject: [PATCH 6/9] Add suppress options in report and list --- .../scm/container/ContainerHealthState.java | 7 - .../hdds/scm/container/ContainerInfo.java | 38 ++- .../container/TestContainerHealthState.java | 16 +- .../hadoop/hdds/scm/client/ScmClient.java | 27 +- .../StorageContainerLocationProtocol.java | 30 +- ...ocationProtocolClientSideTranslatorPB.java | 23 +- .../src/main/proto/ScmAdminProtocol.proto | 13 +- .../src/main/proto/hdds.proto | 2 +- .../container/ContainerStateManagerImpl.java | 16 +- .../replication/ReplicationManager.java | 10 +- ...ocationProtocolServerSideTranslatorPB.java | 21 +- .../scm/server/SCMClientProtocolServer.java | 67 ++-- .../apache/hadoop/ozone/audit/SCMAction.java | 4 +- .../scm/cli/ContainerOperationClient.java | 20 +- .../cli/container/AckMissingSubcommand.java | 98 ------ .../scm/cli/container/ContainerCommands.java | 2 - .../cli/container/ContainerIDParameters.java | 11 +- .../scm/cli/container/ListSubcommand.java | 9 +- .../scm/cli/container/ReportSubcommand.java | 60 ++++ .../cli/container/UnackMissingSubcommand.java | 75 ----- .../TestAckMissingContainerSubcommand.java | 120 ------- .../TestContainerReportSuppressOptions.java | 298 ++++++++++++++++++ 22 files changed, 565 insertions(+), 402 deletions(-) delete mode 100644 hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java delete mode 100644 hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java delete mode 100644 hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java create mode 100644 hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java index ac9a82d190d1..56dd1c5620b6 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerHealthState.java @@ -107,13 +107,6 @@ public enum ContainerHealthState { "Containers in OPEN state without any healthy Pipeline", "OpenContainersWithoutPipeline"), - /** - * Acknowledge missing containers which are not problematic. - */ - ACK_MISSING((short) 10, - "Acknowledge missing containers which are not problematic", - "AcknowledgeMissingContainers"), - // ========== Actual Combinations Found in Code (100+) ========== /** diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java index 859a9482c1b8..acefb5f0b38e 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerInfo.java @@ -20,6 +20,7 @@ import static java.lang.Math.max; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import java.time.Clock; import java.time.Instant; @@ -87,6 +88,7 @@ public final class ContainerInfo implements Comparable { private long sequenceId; // Health state of the container (determined by ReplicationManager) private ContainerHealthState healthState; + private boolean suppressed; private ContainerInfo(Builder b) { containerID = ContainerID.valueOf(b.containerID); @@ -102,6 +104,7 @@ private ContainerInfo(Builder b) { replicationConfig = b.replicationConfig; clock = b.clock; healthState = b.healthState != null ? b.healthState : ContainerHealthState.HEALTHY; + suppressed = b.suppressed; } public static Codec getCodec() { @@ -123,8 +126,8 @@ public static ContainerInfo fromProtobuf(HddsProtos.ContainerInfoProto info) { .setReplicationConfig(config) .setSequenceId(info.getSequenceId()); - if (info.hasAckMissing() && info.getAckMissing()) { - builder.setHealthState(ContainerHealthState.ACK_MISSING); + if (info.hasSuppressed()) { + builder.setSuppressed(info.getSuppressed()); } if (info.hasPipelineID()) { @@ -267,6 +270,26 @@ public void setHealthState(ContainerHealthState newHealthState) { this.healthState = newHealthState; } + /** + * Check if container is suppressed. + * Only included in JSON output when true. + * + * @return boolean + */ + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public boolean isSuppressed() { + return suppressed; + } + + /** + * Set the boolean for suppressed. + * + * @param suppressed checks if container is suppressed or not + */ + public void setSuppressed(boolean suppressed) { + this.suppressed = suppressed; + } + @JsonIgnore public HddsProtos.ContainerInfoProto getProtobuf() { HddsProtos.ContainerInfoProto.Builder builder = @@ -292,9 +315,8 @@ public HddsProtos.ContainerInfoProto getProtobuf() { builder.setPipelineID(getPipelineID().getProtobuf()); } - // Only persist ACK_MISSING health state, others are dynamic - if (healthState == ContainerHealthState.ACK_MISSING) { - builder.setAckMissing(true); + if (suppressed) { + builder.setSuppressed(true); } return builder.build(); @@ -399,6 +421,7 @@ public static class Builder { private PipelineID pipelineID; private ReplicationConfig replicationConfig; private ContainerHealthState healthState; + private boolean suppressed; public Builder setPipelineID(PipelineID pipelineId) { this.pipelineID = pipelineId; @@ -456,6 +479,11 @@ public Builder setHealthState(ContainerHealthState healthState) { return this; } + public Builder setSuppressed(boolean suppressed) { + this.suppressed = suppressed; + return this; + } + /** * Also resets {@code stateEnterTime}, so make sure to set clock first. */ diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java index 763a2e5a9a21..6ffc678ea175 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerHealthState.java @@ -17,7 +17,6 @@ package org.apache.hadoop.hdds.scm.container; -import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.ACK_MISSING; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.EMPTY; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.HEALTHY; import static org.apache.hadoop.hdds.scm.container.ContainerHealthState.MISSING; @@ -38,9 +37,6 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; import org.junit.jupiter.api.Test; /** @@ -63,7 +59,6 @@ public void testIndividualStateValues() { assertEquals(7, OPEN_UNHEALTHY.getValue()); assertEquals(8, QUASI_CLOSED_STUCK.getValue()); assertEquals(9, OPEN_WITHOUT_PIPELINE.getValue()); - assertEquals(10, ACK_MISSING.getValue()); } @Test @@ -74,7 +69,7 @@ public void testCombinationStateValues() { assertEquals(102, MISSING_UNDER_REPLICATED.getValue()); assertEquals(103, QUASI_CLOSED_STUCK_UNDER_REPLICATED.getValue()); assertEquals(104, QUASI_CLOSED_STUCK_OVER_REPLICATED.getValue()); - assertEquals(105, QUASI_CLOSED_STUCK_MISSING.getValue()); + assertEquals(105, ContainerHealthState.QUASI_CLOSED_STUCK_MISSING.getValue()); } @Test @@ -106,7 +101,6 @@ public void testFromValueIndividualStates() { assertEquals(OPEN_UNHEALTHY, ContainerHealthState.fromValue((short) 7)); assertEquals(QUASI_CLOSED_STUCK, ContainerHealthState.fromValue((short) 8)); assertEquals(OPEN_WITHOUT_PIPELINE, ContainerHealthState.fromValue((short) 9)); - assertEquals(ACK_MISSING, ContainerHealthState.fromValue((short) 10)); } @Test @@ -132,7 +126,7 @@ public void testFromValueUnknownReturnsHealthy() { @Test public void testAllEnumValuesAreUnique() { // Verify all enum constants have unique values - Set values = new HashSet<>(); + java.util.Set values = new java.util.HashSet<>(); for (ContainerHealthState state : ContainerHealthState.values()) { assertFalse(values.contains(state.getValue()), "Duplicate value found: " + state.getValue()); @@ -143,16 +137,16 @@ public void testAllEnumValuesAreUnique() { @Test public void testIndividualStateCount() { // Should have 10 individual states (0-9) - long individualCount = Arrays.stream(ContainerHealthState.values()) + long individualCount = java.util.Arrays.stream(ContainerHealthState.values()) .filter(s -> s.getValue() >= 0 && s.getValue() <= 99) .count(); - assertEquals(11, individualCount, "Expected 10 individual states"); + assertEquals(10, individualCount, "Expected 10 individual states"); } @Test public void testCombinationStateCount() { // Should have 6 combination states (100-105) - long combinationCount = Arrays.stream(ContainerHealthState.values()) + long combinationCount = java.util.Arrays.stream(ContainerHealthState.values()) .filter(s -> s.getValue() >= 100) .count(); assertEquals(6, combinationCount, "Expected 6 combination states"); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 58303fc0bcad..e5456226c8cd 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -145,6 +145,25 @@ ContainerListResult listContainer(long startContainerID, int count, ReplicationConfig replicationConfig) throws IOException; + /** + * Lists a range of containers and get their info. + * + * @param startContainerID start containerID. + * @param count count must be {@literal >} 0. + * @param state Container of this state will be returned. + * @param replicationConfig container replication Config. + * @param suppressed container to be suppressed/unsuppressed from report + * @return a list of containers capped by max count allowed + * in "ozone.scm.container.list.max.count" and total number of containers. + * @throws IOException + */ + ContainerListResult listContainer(long startContainerID, int count, + HddsProtos.LifeCycleState state, + HddsProtos.ReplicationType replicationType, + ReplicationConfig replicationConfig, + Boolean suppressed) + throws IOException; + /** * Read meta data from an existing container. * @param containerID - ID of the container. @@ -466,11 +485,13 @@ DecommissionScmResponseProto decommissionScm( void reconcileContainer(long containerID) throws IOException; /** - * Set or unset the ACK_MISSING state for a container. + * Suppress or unsuppress a container from reports. + * Suppressed containers are excluded from replication manager reports + * regardless of their health state. * * @param containerId The ID of the container. - * @param acknowledge true to set ACK_MISSING, false to unset to MISSING. + * @param suppress true to suppress the container, false to unsuppress it. * @throws IOException */ - void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException; + void suppressContainer(long containerId, boolean suppress) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index d45cc87d3444..bddf7b12744b 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -228,6 +228,30 @@ ContainerListResult listContainer(long startContainerID, HddsProtos.ReplicationType replicationType, ReplicationConfig replicationConfig) throws IOException; + /** + * Ask SCM for a list of containers with a range of container ID, state + * and replication config, and the limit of count. + * The containers are returned from startID (exclusive), and + * filtered by state and replication config. The returned list is limited to + * count entries. + * + * @param startContainerID start container ID. + * @param count count, if count {@literal <} 0, the max size is unlimited.( + * Usually the count will be replace with a very big + * value instead of being unlimited in case the db is very big) + * @param state Container with this state will be returned. + * @param replicationConfig Replication config for the containers + * @param suppressed container to be suppressed/unsuppressed from report + * @return a list of containers capped by max count allowed + * in "ozone.scm.container.list.max.count" and total number of containers. + * @throws IOException + */ + ContainerListResult listContainer(long startContainerID, + int count, HddsProtos.LifeCycleState state, + HddsProtos.ReplicationType replicationType, + ReplicationConfig replicationConfig, + Boolean suppressed) throws IOException; + /** * Deletes a container in SCM. * @@ -523,11 +547,11 @@ DecommissionScmResponseProto decommissionScm( void reconcileContainer(long containerID) throws IOException; /** - * Set or unset the ACK_MISSING state for a container. + * Suppress or unsuppress the container. * * @param containerId The ID of the container. - * @param acknowledge true to set ACK_MISSING, false to unset to MISSING. + * @param suppress true to suppress, false to unsuppress. * @throws IOException */ - void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException; + void suppressContainer(long containerId, boolean suppress) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 947d439d5b94..ecd7fac47171 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -115,7 +115,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest.Builder; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerRequestProto; @@ -125,6 +124,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SuppressContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; @@ -443,6 +443,16 @@ public ContainerListResult listContainer(long startContainerID, int count, HddsProtos.ReplicationType replicationType, ReplicationConfig replicationConfig) throws IOException { + return listContainer(startContainerID, count, state, replicationType, replicationConfig, null); + } + + @Override + public ContainerListResult listContainer(long startContainerID, int count, + HddsProtos.LifeCycleState state, + HddsProtos.ReplicationType replicationType, + ReplicationConfig replicationConfig, + Boolean suppressed) + throws IOException { Preconditions.checkState(startContainerID >= 0, "Container ID cannot be negative."); Preconditions.checkState(count > 0, @@ -452,6 +462,9 @@ public ContainerListResult listContainer(long startContainerID, int count, builder.setStartContainerID(startContainerID); builder.setCount(count); builder.setTraceID(TracingUtil.exportCurrentSpan()); + if (suppressed != null) { + builder.setSuppressed(suppressed); + } if (state != null) { builder.setState(state); } @@ -1291,13 +1304,13 @@ public void reconcileContainer(long containerID) throws IOException { } @Override - public void setAckMissingContainer(long containerID, boolean acknowledge) + public void suppressContainer(long containerID, boolean suppress) throws IOException { - SetAckMissingContainerRequestProto request = SetAckMissingContainerRequestProto.newBuilder() + SuppressContainerRequestProto request = SuppressContainerRequestProto.newBuilder() .setContainerID(containerID) - .setAcknowledge(acknowledge) + .setSuppress(suppress) .build(); - submitRequest(Type.SetAckMissingContainer, builder -> builder.setSetAckMissingContainerRequest(request)); + submitRequest(Type.SuppressContainer, builder -> builder.setSuppressContainerRequest(request)); } /** diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index 151a060de4b8..03f024df4daa 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -87,7 +87,7 @@ message ScmContainerLocationRequest { optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48; optional ReconcileContainerRequestProto reconcileContainerRequest = 49; optional GetDeletedBlocksTxnSummaryRequestProto getDeletedBlocksTxnSummaryRequest = 50; - optional SetAckMissingContainerRequestProto setAckMissingContainerRequest = 51; + optional SuppressContainerRequestProto suppressContainerRequest = 51; } message ScmContainerLocationResponse { @@ -146,7 +146,7 @@ message ScmContainerLocationResponse { optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48; optional ReconcileContainerResponseProto reconcileContainerResponse = 49; optional GetDeletedBlocksTxnSummaryResponseProto getDeletedBlocksTxnSummaryResponse = 50; - optional SetAckMissingContainerResponseProto setAckMissingContainerResponse = 51; + optional SuppressContainerResponseProto suppressContainerResponse = 51; enum Status { OK = 1; @@ -204,7 +204,7 @@ enum Type { GetContainerBalancerStatusInfo = 44; ReconcileContainer = 45; GetDeletedBlocksTransactionSummary = 46; - SetAckMissingContainer = 47; + SuppressContainer = 47; } /** @@ -302,6 +302,7 @@ message SCMListContainerRequestProto { optional ReplicationFactor factor = 5; optional ReplicationType type = 6; optional ECReplicationConfig ecReplicationConfig = 7; + optional bool suppressed = 8; } message SCMListContainerResponseProto { @@ -700,12 +701,12 @@ message ReconcileContainerRequestProto { message ReconcileContainerResponseProto { } -message SetAckMissingContainerRequestProto { +message SuppressContainerRequestProto { optional int64 containerID = 1; - optional bool acknowledge = 2; + optional bool suppress = 2; } -message SetAckMissingContainerResponseProto { +message SuppressContainerResponseProto { } /** diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index ddd4ee3e0324..eab367559273 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -271,7 +271,7 @@ message ContainerInfoProto { optional ReplicationFactor replicationFactor = 10; required ReplicationType replicationType = 11; optional ECReplicationConfig ecReplicationConfig = 12; - optional bool ackMissing = 13; + optional bool suppressed = 13; } message ContainerWithPipeline { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 056bd09d12bb..e5164e81a8e6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -568,20 +568,10 @@ public void updateContainerInfo(HddsProtos.ContainerInfoProto updatedInfoProto) if (currentInfo == null) { throw new ContainerNotFoundException(containerID); } - - // Only persist ACK_MISSING health state changes - // Other health states are dynamic and computed by ReplicationManager - ContainerHealthState newHealthState = updatedInfo.getHealthState(); - if (newHealthState == ContainerHealthState.ACK_MISSING) { - currentInfo.setHealthState(ContainerHealthState.ACK_MISSING); - LOG.debug("Persisting ACK_MISSING state for container: {}", containerID); - } else if (currentInfo.getHealthState() == ContainerHealthState.ACK_MISSING) { - currentInfo.setHealthState(null); - LOG.debug("Clearing ACK_MISSING state for container: {}, new state: {}", - containerID, newHealthState); - } + // Update suppressed flag + currentInfo.setSuppressed(updatedInfo.isSuppressed()); transactionBuffer.addToBuffer(containerStore, containerID, currentInfo); - LOG.debug("Updated container info for container: {}, healthState={}", containerID, currentInfo.getHealthState()); + LOG.debug("Updated container info for container: {}, suppressed={}", containerID, currentInfo.isSuppressed()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index d1d317108c89..92a12b1d379c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -55,7 +55,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ReplicationCommandPriority; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.hdds.scm.PlacementPolicy; -import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; @@ -855,11 +854,9 @@ protected boolean processContainer(ContainerInfo containerInfo, ReplicationQueue repQueue, ReplicationManagerReport report, boolean readOnly) throws ContainerNotFoundException { synchronized (containerInfo) { - // Skip containers that are acknowledged as missing - // These containers are persisted with ACK_MISSING state and should not be - // processed by ReplicationManager until unacknowledged - if (containerInfo.getHealthState() == ContainerHealthState.ACK_MISSING) { - LOG.debug("Skipping ACK_MISSING container: {}", containerInfo.getContainerID()); + // Filter out suppressed containers early + if (containerInfo.isSuppressed()) { + LOG.debug("Skipping suppressed container: {}", containerInfo.getContainerID()); return false; } @@ -1577,4 +1574,3 @@ public synchronized boolean notifyNodeStateChange() { } } } - diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index d69c959a206e..f63b4702029d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -123,8 +123,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationRequest; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse.Status; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerRequestProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SetAckMissingContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SingleNodeQueryResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerRequestProto; @@ -137,6 +135,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SuppressContainerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SuppressContainerResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -754,12 +754,11 @@ public ScmContainerLocationResponse processRequest( .setStatus(Status.OK) .setReconcileContainerResponse(reconcileContainer(request.getReconcileContainerRequest())) .build(); - case SetAckMissingContainer: + case SuppressContainer: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) .setStatus(Status.OK) - .setSetAckMissingContainerResponse( - setAckMissingContainer(request.getSetAckMissingContainerRequest())) + .setSuppressContainerResponse(suppressContainer(request.getSuppressContainerRequest())) .build(); default: throw new IllegalArgumentException( @@ -890,6 +889,9 @@ public SCMListContainerResponseProto listContainer( } else if (request.hasFactor()) { factor = request.getFactor(); } + // Filter by suppressed: true (suppressed only), false (unsuppressed only) or null (display all). + Boolean suppressed = request.hasSuppressed() ? request.getSuppressed() : null; + ContainerListResult containerListAndTotalCount; if (factor != null) { // Call from a legacy client @@ -897,7 +899,7 @@ public SCMListContainerResponseProto listContainer( impl.listContainer(startContainerID, count, state, factor); } else { containerListAndTotalCount = - impl.listContainer(startContainerID, count, state, replicationType, repConfig); + impl.listContainer(startContainerID, count, state, replicationType, repConfig, suppressed); } SCMListContainerResponseProto.Builder builder = SCMListContainerResponseProto.newBuilder(); @@ -1410,9 +1412,8 @@ public ReconcileContainerResponseProto reconcileContainer(ReconcileContainerRequ return ReconcileContainerResponseProto.getDefaultInstance(); } - public SetAckMissingContainerResponseProto setAckMissingContainer( - SetAckMissingContainerRequestProto request) throws IOException { - impl.setAckMissingContainer(request.getContainerID(), request.getAcknowledge()); - return SetAckMissingContainerResponseProto.getDefaultInstance(); + public SuppressContainerResponseProto suppressContainer(SuppressContainerRequestProto request) throws IOException { + impl.suppressContainer(request.getContainerID(), request.getSuppress()); + return SuppressContainerResponseProto.getDefaultInstance(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 0ad142ef8933..1ff6931722f3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -71,7 +71,6 @@ import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.FetchMetrics; import org.apache.hadoop.hdds.scm.ScmInfo; -import org.apache.hadoop.hdds.scm.container.ContainerHealthState; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerListResult; @@ -452,7 +451,7 @@ private boolean hasRequiredReplicas(ContainerInfo contInfo) { @Override public ContainerListResult listContainer(long startContainerID, int count) throws IOException { - return listContainer(startContainerID, count, null, null, null); + return listContainer(startContainerID, count, null, null, null, null); } /** @@ -469,7 +468,7 @@ public ContainerListResult listContainer(long startContainerID, @Override public ContainerListResult listContainer(long startContainerID, int count, HddsProtos.LifeCycleState state) throws IOException { - return listContainer(startContainerID, count, state, null, null); + return listContainer(startContainerID, count, state, null, null, null); } /** @@ -488,20 +487,27 @@ public ContainerListResult listContainer(long startContainerID, public ContainerListResult listContainer(long startContainerID, int count, HddsProtos.LifeCycleState state, HddsProtos.ReplicationFactor factor) throws IOException { - return listContainerInternal(startContainerID, count, state, factor, null, null); + return listContainerInternal(startContainerID, count, state, factor, null, null, null); } private ContainerListResult listContainerInternal(long startContainerID, int count, HddsProtos.LifeCycleState state, HddsProtos.ReplicationFactor factor, HddsProtos.ReplicationType replicationType, - ReplicationConfig repConfig) throws IOException { + ReplicationConfig repConfig, + Boolean suppressed) throws IOException { boolean auditSuccess = true; Map auditMap = buildAuditMap(startContainerID, count, state, factor, replicationType, repConfig); try { Stream containerStream = buildContainerStream(factor, replicationType, repConfig, getBaseContainerStream(state)); + + // Filter by suppressed flag only if explicitly specified + if (suppressed != null) { + containerStream = containerStream.filter(info -> info.isSuppressed() == suppressed); + } + List containerInfos = containerStream.filter(info -> info.containerID().getId() >= startContainerID) .sorted().collect(Collectors.toList()); @@ -589,7 +595,28 @@ public ContainerListResult listContainer(long startContainerID, int count, HddsProtos.LifeCycleState state, HddsProtos.ReplicationType replicationType, ReplicationConfig repConfig) throws IOException { - return listContainerInternal(startContainerID, count, state, null, replicationType, repConfig); + return listContainerInternal(startContainerID, count, state, null, replicationType, repConfig, null); + } + + /** + * Lists a range of containers and get their info. + * + * @param startContainerID start containerID. + * @param count count must be {@literal >} 0. + * @param state Container with this state will be returned. + * @param repConfig Replication Config for the container. + * @param suppressed container to be suppressed/unsuppressed from report + * @return a list of containers capped by max count allowed + * in "ozone.scm.container.list.max.count" and total number of containers. + * @throws IOException + */ + @Override + public ContainerListResult listContainer(long startContainerID, + int count, HddsProtos.LifeCycleState state, + HddsProtos.ReplicationType replicationType, + ReplicationConfig repConfig, + Boolean suppressed) throws IOException { + return listContainerInternal(startContainerID, count, state, null, replicationType, repConfig, suppressed); } @Override @@ -1698,37 +1725,19 @@ public void reconcileContainer(long longContainerID) throws IOException { } @Override - public void setAckMissingContainer(long longContainerID, boolean acknowledge) throws IOException { + public void suppressContainer(long longContainerID, boolean suppress) throws IOException { ContainerID containerID = ContainerID.valueOf(longContainerID); final Map auditMap = new HashMap<>(); auditMap.put("containerID", containerID.toString()); - auditMap.put("acknowledge", String.valueOf(acknowledge)); - + auditMap.put("suppress", String.valueOf(suppress)); + SCMAction action = suppress ? SCMAction.SUPPRESS_CONTAINER : SCMAction.UNSUPPRESS_CONTAINER; try { getScm().checkAdminAccess(getRemoteUser(), false); ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); - - if (acknowledge) { - // Validation for setting ACK_MISSING - Set replicas = scm.getContainerManager().getContainerReplicas(containerID); - if (replicas != null && !replicas.isEmpty()) { - throw new IOException("Container " + longContainerID + " has " + replicas.size() + - " replicas and cannot be acknowledged as missing"); - } - if (containerInfo.getNumberOfKeys() == 0) { - throw new IOException("Container " + longContainerID + " is empty (0 keys) and cannot be acknowledged."); - } - // Set to ACK_MISSING - containerInfo.setHealthState(ContainerHealthState.ACK_MISSING); - AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.ACKNOWLEDGE_MISSING_CONTAINER, auditMap)); - } else { - containerInfo.setHealthState(null); - AUDIT.logWriteSuccess(buildAuditMessageForSuccess(SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER, auditMap)); - } + containerInfo.setSuppressed(suppress); scm.getContainerManager().updateContainerInfo(containerID, containerInfo.getProtobuf()); + AUDIT.logWriteSuccess(buildAuditMessageForSuccess(action, auditMap)); } catch (IOException ex) { - SCMAction action = acknowledge ? - SCMAction.ACKNOWLEDGE_MISSING_CONTAINER : SCMAction.UNACKNOWLEDGE_MISSING_CONTAINER; AUDIT.logWriteFailure(buildAuditMessageForFailure(action, auditMap, ex)); throw ex; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java index e3f0be24a4f6..5e177543234d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java @@ -69,8 +69,8 @@ public enum SCMAction implements AuditAction { GET_PIPELINE, RECONCILE_CONTAINER, GET_DELETED_BLOCK_SUMMARY, - ACKNOWLEDGE_MISSING_CONTAINER, - UNACKNOWLEDGE_MISSING_CONTAINER; + SUPPRESS_CONTAINER, + UNSUPPRESS_CONTAINER; @Override public String getAction() { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index fdbbd6a92c49..14e52b688320 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -389,6 +389,22 @@ public ContainerListResult listContainer(long startContainerID, startContainerID, count, state, repType, replicationConfig); } + @Override + public ContainerListResult listContainer(long startContainerID, + int count, HddsProtos.LifeCycleState state, + HddsProtos.ReplicationType repType, + ReplicationConfig replicationConfig, + Boolean suppressed) throws IOException { + if (count > maxCountOfContainerList) { + LOG.warn("Attempting to list {} containers. However, this exceeds" + + " the cluster's current limit of {}. The results will be capped at the" + + " maximum allowed count.", count, maxCountOfContainerList); + count = maxCountOfContainerList; + } + return storageContainerLocationClient.listContainer( + startContainerID, count, state, repType, replicationConfig, suppressed); + } + @Override public ContainerDataProto readContainer(long containerID, Pipeline pipeline) throws IOException { @@ -616,7 +632,7 @@ public void reconcileContainer(long id) throws IOException { } @Override - public void setAckMissingContainer(long containerId, boolean acknowledge) throws IOException { - storageContainerLocationClient.setAckMissingContainer(containerId, acknowledge); + public void suppressContainer(long containerId, boolean suppress) throws IOException { + storageContainerLocationClient.suppressContainer(containerId, suppress); } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java deleted file mode 100644 index 49cb86b9c9e3..000000000000 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/AckMissingSubcommand.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm.cli.container; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.hadoop.hdds.cli.HddsVersionProvider; -import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; -import org.apache.hadoop.hdds.scm.client.ScmClient; -import org.apache.hadoop.hdds.scm.container.ContainerHealthState; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import org.apache.hadoop.hdds.scm.container.ContainerListResult; -import picocli.CommandLine; - -/** - * Acknowledge missing container(s) to suppress them from Replication Manager Report. - */ -@CommandLine.Command( - name = "ack", - description = "Acknowledge missing container(s) to suppress them from reports", - mixinStandardHelpOptions = true, - versionProvider = HddsVersionProvider.class) -public class AckMissingSubcommand extends ScmSubcommand { - - @CommandLine.Parameters(description = "Container IDs to acknowledge (comma-separated)", - arity = "0..1") - private String containers; - - @CommandLine.Option(names = {"--list"}, - description = "List all acknowledged missing containers") - private boolean list; - - @Override - public void execute(ScmClient scmClient) throws IOException { - if (list) { - // List acknowledged containers - ContainerListResult result = scmClient.listContainer(1, Integer.MAX_VALUE); - for (ContainerInfo info : result.getContainerInfoList()) { - if (info.getHealthState() == ContainerHealthState.ACK_MISSING) { - out().println(info.getContainerID()); - } - } - } else if (containers != null && !containers.isEmpty()) { - // Acknowledge containers - Set ids = parseContainerIds(containers); - for (Long id : ids) { - try { - int replicaCount = scmClient.getContainerReplicas(id).size(); - if (replicaCount > 0) { - err().println("Cannot acknowledge container " + id + ": has " + replicaCount + " replica(s). " + - "Only containers with 0 replicas can be acknowledged as missing."); - continue; - } - - ContainerInfo containerInfo = scmClient.getContainer(id); - if (containerInfo.getNumberOfKeys() == 0) { - err().println("Cannot acknowledge container " + id + ": container is empty (0 keys). " + - "Empty containers are auto-deleted and don't need acknowledgement."); - continue; - } - - scmClient.setAckMissingContainer(id, true); - out().println("Acknowledged container: " + id); - } catch (IOException e) { - err().println("Failed to acknowledge container " + id + ": " + e.getMessage()); - } - } - } else { - throw new IllegalArgumentException( - "Either provide container IDs or use --list option"); - } - } - - private Set parseContainerIds(String input) { - return Arrays.stream(input.split(",")) - .map(String::trim) - .filter(s -> !s.isEmpty()) - .map(Long::parseLong) - .collect(Collectors.toSet()); - } -} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java index aafe88415951..463b091f5502 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java @@ -38,8 +38,6 @@ ReportSubcommand.class, UpgradeSubcommand.class, ReconcileSubcommand.class, - AckMissingSubcommand.class, - UnackMissingSubcommand.class }) @MetaInfServices(AdminSubcommand.class) public class ContainerCommands implements AdminSubcommand { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java index 36e615a5829e..f41b08b3f9c3 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java @@ -30,13 +30,22 @@ public class ContainerIDParameters extends ItemsFromStdin { private CommandLine.Model.CommandSpec spec; @CommandLine.Parameters(description = "Container IDs" + FORMAT_DESCRIPTION, - arity = "1..*", + arity = "0..*", paramLabel = "") public void setContainerIDs(List arguments) { setItems(arguments); } public List getValidatedIDs() { + return getValidatedIDs(true); + } + + public List getValidatedIDs(boolean required) { + if (required && size() == 0) { + throw new CommandLine.MissingParameterException(spec.commandLine(), + spec.commandLine().getCommandSpec().args(), + "Missing required parameter: ''"); + } List containerIDs = new ArrayList<>(size()); List invalidIDs = new ArrayList<>(); diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ListSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ListSubcommand.java index 4bc9af843a08..0cc4b8bdee1c 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ListSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ListSubcommand.java @@ -86,6 +86,11 @@ public class ListSubcommand extends ScmSubcommand { "rs-6-3-1024k for EC)") private String replication; + @Option(names = {"--suppressed"}, + description = "Filter by suppression status. Set to 'true' to list only suppressed containers " + + "or 'false' to list only those that are not suppressed.") + private Boolean suppressed; + private static final ObjectWriter WRITER; static { @@ -129,7 +134,7 @@ public void execute(ScmClient scmClient) throws IOException { } ContainerListResult containerListResult = - scmClient.listContainer(startId, count, state, type, repConfig); + scmClient.listContainer(startId, count, state, type, repConfig, suppressed); writeContainers(sequenceWriter, containerListResult.getContainerInfoList()); @@ -169,7 +174,7 @@ private void listAllContainers(ScmClient scmClient, SequenceWriter writer, do { ContainerListResult result = - scmClient.listContainer(currentStartId, batchSize, state, type, repConfig); + scmClient.listContainer(currentStartId, batchSize, state, type, repConfig, suppressed); fetchedCount = result.getContainerInfoList().size(); writeContainers(writer, result.getContainerInfoList()); diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java index b5dc960d8c75..be9bba41beaa 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java @@ -44,6 +44,30 @@ public class ReportSubcommand extends ScmSubcommand { @CommandLine.Spec private CommandLine.Model.CommandSpec spec; + @CommandLine.ArgGroup(exclusive = true, multiplicity = "0..1") + private SuppressOptions suppressOptions; + + static class SuppressOptions { + @CommandLine.Option(names = {"--suppress"}, + description = "Suppress container(s) from future reports") + private boolean suppress; + + @CommandLine.Option(names = {"--unsuppress"}, + description = "Unsuppress container(s) to include in future reports") + private boolean unsuppress; + + public boolean isSuppress() { + return suppress; + } + + public boolean isUnsuppress() { + return unsuppress; + } + } + + @CommandLine.Mixin + private ContainerIDParameters containerList; + @CommandLine.Option(names = { "--json" }, defaultValue = "false", description = "Format output as JSON") @@ -51,6 +75,42 @@ public class ReportSubcommand extends ScmSubcommand { @Override public void execute(ScmClient scmClient) throws IOException { + if (suppressOptions != null) { + handleSuppressUnsuppress(scmClient); + return; + } + + printReport(scmClient); + } + + private void handleSuppressUnsuppress(ScmClient scmClient) throws IOException { + boolean suppress = suppressOptions.isSuppress(); + List containerIDs = containerList.getValidatedIDs(); + + int failures = 0; + for (long id : containerIDs) { + try { + scmClient.suppressContainer(id, suppress); + out().println((suppress ? "Suppressed" : "Unsuppressed") + " container: " + id); + } catch (IOException e) { + err().println("Failed to " + (suppress ? "suppress" : "unsuppress") + + " container " + id + ": " + e.getMessage()); + failures++; + } + } + + int numOfSuccess = containerIDs.size() - failures; + if (numOfSuccess > 0) { + out().println("\n" + (suppress ? "Suppressed " : "Unsuppressed ") + numOfSuccess + " container(s) successfully."); + out().println("Container report will be updated after the next Replication Manager cycle."); + } + + if (failures > 0) { + throw new IOException("Failed to " + (suppress ? "suppress" : "unsuppress") + failures + " container(s)."); + } + } + + private void printReport(ScmClient scmClient) throws IOException { ReplicationManagerReport report = scmClient.getReplicationManagerReport(); if (report.getReportTimeStamp() == 0) { System.err.println("The Container Report is not available until Replication Manager completes" + diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java deleted file mode 100644 index bafaedcdb184..000000000000 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/UnackMissingSubcommand.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm.cli.container; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.hadoop.hdds.cli.HddsVersionProvider; -import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; -import org.apache.hadoop.hdds.scm.client.ScmClient; -import org.apache.hadoop.hdds.scm.container.ContainerHealthState; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import picocli.CommandLine; - -/** - * Unacknowledge missing container(s) to report them again. - */ -@CommandLine.Command( - name = "unack", - description = "Unacknowledge missing container(s) to report them again", - mixinStandardHelpOptions = true, - versionProvider = HddsVersionProvider.class) -public class UnackMissingSubcommand extends ScmSubcommand { - - @CommandLine.Parameters(description = "Container IDs to unacknowledge (comma-separated)") - private String containers; - - @Override - public void execute(ScmClient scmClient) throws IOException { - if (containers == null || containers.isEmpty()) { - throw new IllegalArgumentException( - "Container IDs must be provided"); - } - - Set ids = parseContainerIds(containers); - for (Long id : ids) { - try { - ContainerInfo containerInfo = scmClient.getContainer(id); - if (containerInfo.getHealthState() != ContainerHealthState.ACK_MISSING) { - err().println("Cannot unacknowledge container " + id + ": " + - "Only acknowledged missing containers can be unacknowledged."); - continue; - } - scmClient.setAckMissingContainer(id, false); - out().println("Unacknowledged container: " + id); - } catch (IOException e) { - err().println("Failed to unacknowledge container " + id + ": " + e.getMessage()); - } - } - } - - private Set parseContainerIds(String input) { - return Arrays.stream(input.split(",")) - .map(String::trim) - .filter(s -> !s.isEmpty()) - .map(Long::parseLong) - .collect(Collectors.toSet()); - } -} diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java deleted file mode 100644 index dc75850a58a4..000000000000 --- a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestAckMissingContainerSubcommand.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm.cli.container; - -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN; -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.List; -import org.apache.hadoop.hdds.client.RatisReplicationConfig; -import org.apache.hadoop.hdds.scm.client.ScmClient; -import org.apache.hadoop.hdds.scm.container.ContainerHealthState; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import org.apache.hadoop.hdds.scm.container.ContainerListResult; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import picocli.CommandLine; - -/** - * Tests for AckMissingContainerSubcommand. - */ -public class TestAckMissingContainerSubcommand { - - private ScmClient scmClient; - private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); - private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); - private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); - - @BeforeEach - public void setup() throws IOException { - scmClient = mock(ScmClient.class); - - System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); - System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); - } - - @Test - public void testAckMissingContainer() throws Exception { - ContainerInfo container = mockContainer(1, false); - when(scmClient.getContainer(1L)).thenReturn(container); - - AckMissingSubcommand cmd = new AckMissingSubcommand(); - new CommandLine(cmd).parseArgs("1"); - cmd.execute(scmClient); - verify(scmClient, times(1)).setAckMissingContainer(1L, true); - - String output = outContent.toString(DEFAULT_ENCODING); - assertThat(output).contains("Acknowledged container: 1"); - } - - @Test - public void testListAcknowledgedContainers() throws Exception { - ContainerInfo container1 = mockContainer(1, true); - ContainerInfo container2 = mockContainer(2, false); - - List allContainers = Arrays.asList(container1, container2); - ContainerListResult result = new ContainerListResult(allContainers, 2); - when(scmClient.listContainer(anyLong(), anyInt())).thenReturn(result); - - AckMissingSubcommand cmd = new AckMissingSubcommand(); - new CommandLine(cmd).parseArgs("--list"); - cmd.execute(scmClient); - - String output = outContent.toString(DEFAULT_ENCODING); - assertThat(output).contains("1"); - assertThat(output).doesNotContain("2"); - } - - @Test - public void testUnacknowledgeMissingContainer() throws Exception { - ContainerInfo container = mockContainer(1, true); - when(scmClient.getContainer(1L)).thenReturn(container); - - UnackMissingSubcommand cmd = new UnackMissingSubcommand(); - new CommandLine(cmd).parseArgs("1"); - cmd.execute(scmClient); - verify(scmClient, times(1)).setAckMissingContainer(1L, false); - - String output = outContent.toString(DEFAULT_ENCODING); - assertThat(output).contains("Unacknowledged container: 1"); - } - - private ContainerInfo mockContainer(long containerID, boolean ackMissing) { - ContainerHealthState healthState = ackMissing ? - ContainerHealthState.ACK_MISSING : ContainerHealthState.MISSING; - return new ContainerInfo.Builder() - .setContainerID(containerID) - .setState(OPEN) - .setHealthState(healthState) - .setReplicationConfig(RatisReplicationConfig.getInstance(ONE)) - .setNumberOfKeys(1) - .build(); - } -} diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java new file mode 100644 index 000000000000..872429efbb82 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java @@ -0,0 +1,298 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.container; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.protocol.DatanodeID; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerListResult; +import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; +import picocli.CommandLine; + +/** + * Tests the complete workflow of suppressing empty containers: + * 1. Assert RM report shows EMPTY/MISSING containers + * 2. Suppress a MISSING container + * 3. List only suppressed containers + * 4. Unsuppress the container + * 5. List non-suppressed containers + */ +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class TestContainerReportSuppressOptions { + + private ScmClient scmClient; + private ReportSubcommand reportCmd; + private List containers; + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); + private final PrintStream originalOut = System.out; + private final PrintStream originalErr = System.err; + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + + @BeforeEach + public void setup() throws IOException { + scmClient = mock(ScmClient.class); + MockDatanodeDetails.createDatanodeDetails(DatanodeID.randomID()); + containers = new ArrayList<>(); + + containers.add(createEmptyContainer(1L)); // EMPTY container (0 keys, no replicas) + containers.add(createMissingContainer(2L)); // MISSING container (has keys, no replicas) + + // Mock RM report + when(scmClient.getReplicationManagerReport()).thenAnswer(inv -> createMockReport()); + + // Mock listContainer + when(scmClient.listContainer(anyLong(), anyInt(), eq(null), eq(null), eq(null), eq(true))) + .thenAnswer(inv -> listSuppressedContainers()); + when(scmClient.listContainer(anyLong(), anyInt(), eq(null), eq(null), eq(null), eq(false))) + .thenAnswer(inv -> listNonSuppressedContainers()); + + // Mock suppress/unsuppress + doNothing().when(scmClient).suppressContainer(anyLong(), eq(true)); + doNothing().when(scmClient).suppressContainer(anyLong(), eq(false)); + + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); + } + + @AfterEach + public void tearDown() { + System.setOut(originalOut); + System.setErr(originalErr); + } + + /** + * Test container report shows empty and missing containers. + */ + @Test + @Order(1) + public void testReportShowsEmptyAndMissingContainers() throws IOException { + reportCmd = new ReportSubcommand(); + CommandLine c = new CommandLine(reportCmd); + c.parseArgs(); + reportCmd.execute(scmClient); + + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("EMPTY: 1")); + assertTrue(output.contains("MISSING: 1")); + } + + /** + * Test suppress missing container and check report. + */ + @Test + @Order(2) + public void testSuppressMissingContainer() throws IOException { + outContent.reset(); + reportCmd = new ReportSubcommand(); + CommandLine c = new CommandLine(reportCmd); + c.parseArgs("--suppress", "2"); + reportCmd.execute(scmClient); + + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Suppressed container: 2")); + + containers.get(1).setSuppressed(true); + + outContent.reset(); + reportCmd = new ReportSubcommand(); + c = new CommandLine(reportCmd); + c.parseArgs(); + reportCmd.execute(scmClient); + + output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("EMPTY: 1")); + assertTrue(output.contains("MISSING: 0")); + } + + /** + * Test list suppressed containers from container list command. + */ + @Test + @Order(3) + public void testListSuppressedContainers() throws IOException { + containers.get(1).setSuppressed(true); + + ListSubcommand listCmd = new ListSubcommand(); + CommandLine c = new CommandLine(listCmd); + c.parseArgs("--suppressed=true"); + + outContent.reset(); + listCmd.execute(scmClient); + + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("\"containerID\" : 2")); + assertFalse(output.contains("\"containerID\" : 1")); + } + + /** + * Test unsuppress missing container and check report. + */ + @Test + @Order(4) + public void testUnsuppressContainer() throws IOException { + containers.get(1).setSuppressed(true); + + outContent.reset(); + reportCmd = new ReportSubcommand(); + CommandLine c = new CommandLine(reportCmd); + c.parseArgs("--unsuppress", "2"); + reportCmd.execute(scmClient); + + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Unsuppressed container: 2")); + + containers.get(1).setSuppressed(false); + + outContent.reset(); + reportCmd = new ReportSubcommand(); + c = new CommandLine(reportCmd); + c.parseArgs(); + reportCmd.execute(scmClient); + + output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("MISSING: 1")); + } + + /** + * Test list unsuppressed containers from container list command. + */ + @Test + @Order(5) + public void testListNonSuppressedContainers() throws IOException { + containers.get(1).setSuppressed(false); + + // List only non-suppressed containers + ListSubcommand listCmd = new ListSubcommand(); + CommandLine c = new CommandLine(listCmd); + c.parseArgs("--suppressed=false"); + + outContent.reset(); + listCmd.execute(scmClient); + + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("\"containerID\" : 1")); + assertTrue(output.contains("\"containerID\" : 2")); + assertFalse(output.contains("\"suppressed\" : true")); + } + + /** + * Create mock RM report based on current container states. + */ + private ReplicationManagerReport createMockReport() { + ReplicationManagerReport report = new ReplicationManagerReport(100); + + for (ContainerInfo container : containers) { + if (container.isSuppressed()) { + // Suppressed containers are filtered from the report + continue; + } + + // Determine health state based on container properties + ContainerHealthState healthState; + if (container.getNumberOfKeys() == 0) { + healthState = ContainerHealthState.EMPTY; + report.incrementAndSample(healthState, container); + } else { + healthState = ContainerHealthState.MISSING; + report.incrementAndSample(healthState, container); + } + + // Also increment lifecycle state + report.increment(container.getState()); + } + + report.setComplete(); + return report; + } + + /** + * Create an EMPTY container (0 keys, no replicas). + */ + private ContainerInfo createEmptyContainer(long containerID) { + return new ContainerInfo.Builder() + .setContainerID(containerID) + .setReplicationConfig(RatisReplicationConfig.getInstance(ONE)) + .setState(CLOSED) + .setOwner("TestOwner") + .setNumberOfKeys(0) // Empty - 0 keys + .setPipelineID(PipelineID.randomId()) + .build(); + } + + /** + * Create a MISSING container (has keys but no replicas). + */ + private ContainerInfo createMissingContainer(long containerID) { + return new ContainerInfo.Builder() + .setContainerID(containerID) + .setReplicationConfig(RatisReplicationConfig.getInstance(ONE)) + .setState(CLOSED) + .setOwner("TestOwner") + .setNumberOfKeys(100) // Has keys + .setPipelineID(PipelineID.randomId()) + .build(); + } + + private ContainerListResult listSuppressedContainers() { + List suppressed = new ArrayList<>(); + for (ContainerInfo container : containers) { + if (container.isSuppressed()) { + suppressed.add(container); + } + } + return new ContainerListResult(suppressed, suppressed.size()); + } + + private ContainerListResult listNonSuppressedContainers() { + List nonSuppressed = new ArrayList<>(); + for (ContainerInfo container : containers) { + if (!container.isSuppressed()) { + nonSuppressed.add(container); + } + } + return new ContainerListResult(nonSuppressed, nonSuppressed.size()); + } +} From 967a4214cf92092618256475a4486d227dac1047 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 26 Mar 2026 12:21:21 +0530 Subject: [PATCH 7/9] Fix pmd failures --- .../hdds/scm/cli/container/ReportSubcommand.java | 16 ++++++++-------- .../TestContainerReportSuppressOptions.java | 7 +++---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java index be9bba41beaa..460a08e3d452 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java @@ -47,6 +47,14 @@ public class ReportSubcommand extends ScmSubcommand { @CommandLine.ArgGroup(exclusive = true, multiplicity = "0..1") private SuppressOptions suppressOptions; + @CommandLine.Mixin + private ContainerIDParameters containerList; + + @CommandLine.Option(names = { "--json" }, + defaultValue = "false", + description = "Format output as JSON") + private boolean json; + static class SuppressOptions { @CommandLine.Option(names = {"--suppress"}, description = "Suppress container(s) from future reports") @@ -65,14 +73,6 @@ public boolean isUnsuppress() { } } - @CommandLine.Mixin - private ContainerIDParameters containerList; - - @CommandLine.Option(names = { "--json" }, - defaultValue = "false", - description = "Format output as JSON") - private boolean json; - @Override public void execute(ScmClient scmClient) throws IOException { if (suppressOptions != null) { diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java index 872429efbb82..c1cf25627e09 100644 --- a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java @@ -63,7 +63,6 @@ public class TestContainerReportSuppressOptions { private ScmClient scmClient; - private ReportSubcommand reportCmd; private List containers; private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); @@ -109,7 +108,7 @@ public void tearDown() { @Test @Order(1) public void testReportShowsEmptyAndMissingContainers() throws IOException { - reportCmd = new ReportSubcommand(); + ReportSubcommand reportCmd = new ReportSubcommand(); CommandLine c = new CommandLine(reportCmd); c.parseArgs(); reportCmd.execute(scmClient); @@ -126,7 +125,7 @@ public void testReportShowsEmptyAndMissingContainers() throws IOException { @Order(2) public void testSuppressMissingContainer() throws IOException { outContent.reset(); - reportCmd = new ReportSubcommand(); + ReportSubcommand reportCmd = new ReportSubcommand(); CommandLine c = new CommandLine(reportCmd); c.parseArgs("--suppress", "2"); reportCmd.execute(scmClient); @@ -176,7 +175,7 @@ public void testUnsuppressContainer() throws IOException { containers.get(1).setSuppressed(true); outContent.reset(); - reportCmd = new ReportSubcommand(); + ReportSubcommand reportCmd = new ReportSubcommand(); CommandLine c = new CommandLine(reportCmd); c.parseArgs("--unsuppress", "2"); reportCmd.execute(scmClient); From 3486fd4b0a832b710b1ac7457c79b39271769e42 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 26 Mar 2026 18:14:51 +0530 Subject: [PATCH 8/9] Remove unnecessary changes and fix tests --- .../hadoop/hdds/cli/ItemsFromStdin.java | 20 +++++++++++++------ ...ocationProtocolClientSideTranslatorPB.java | 2 +- .../replication/ReplicationManager.java | 1 + .../health/ClosingContainerHandler.java | 4 ++-- .../health/TestClosingContainerHandler.java | 2 +- .../scm/cli/container/ContainerCommands.java | 2 +- .../cli/container/ContainerIDParameters.java | 2 +- .../TestContainerReportSuppressOptions.java | 7 +------ 8 files changed, 22 insertions(+), 18 deletions(-) diff --git a/hadoop-hdds/cli-common/src/main/java/org/apache/hadoop/hdds/cli/ItemsFromStdin.java b/hadoop-hdds/cli-common/src/main/java/org/apache/hadoop/hdds/cli/ItemsFromStdin.java index 79bad7ed7328..8d096ab3573f 100644 --- a/hadoop-hdds/cli-common/src/main/java/org/apache/hadoop/hdds/cli/ItemsFromStdin.java +++ b/hadoop-hdds/cli-common/src/main/java/org/apache/hadoop/hdds/cli/ItemsFromStdin.java @@ -33,9 +33,21 @@ public abstract class ItemsFromStdin implements Iterable { ": one or more, separated by spaces. To read from stdin, specify '-' and supply one item per line."; private List items = new ArrayList<>(); + private boolean readFromStdin = false; protected void setItems(List arguments) { - items = readItemsFromStdinIfNeeded(arguments); + readFromStdin = arguments != null && !arguments.isEmpty() && + "-".equals(arguments.iterator().next()); + + if (readFromStdin) { + items = readItemsFromStdin(); + } else { + items = arguments == null ? new ArrayList<>() : arguments; + } + } + + public boolean isReadFromStdin() { + return readFromStdin; } public List getItems() { @@ -52,11 +64,7 @@ public int size() { return items.size(); } - private static List readItemsFromStdinIfNeeded(List parameters) { - if (parameters.isEmpty() || !"-".equals(parameters.iterator().next())) { - return parameters; - } - + private static List readItemsFromStdin() { List items = new ArrayList<>(); Scanner scanner = new Scanner(System.in, StandardCharsets.UTF_8.name()); while (scanner.hasNextLine()) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index eaabee49c0a1..2b817476bdb6 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -224,7 +224,7 @@ private ScmContainerLocationResponse submitRequest( private ScmContainerLocationResponse submitRpcRequest( ScmContainerLocationRequest wrapper) throws ServiceException { // If targetScmNode has a specific node ID, route follower-readable requests to that node - if (targetScmNode != null && targetScmNode.hasNodeId() && + if (targetScmNode != null && targetScmNode.hasNodeId() && FOLLOWER_READABLE_COMMAND_TYPES.contains(wrapper.getCmdType())) { try { StorageContainerLocationProtocolPB proxy = fpp.getProxyForNode(targetScmNode.getNodeId()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 29740d3e7fe2..8cd8444d1d2f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -1620,3 +1620,4 @@ public synchronized boolean notifyNodeStateChange() { } } } + diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java index 2f1e5bdf897d..a09f5079ffe5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosingContainerHandler.java @@ -68,8 +68,8 @@ public boolean handle(ContainerCheckRequest request) { boolean forceClose = containerInfo.getReplicationConfig() .getReplicationType() != ReplicationType.RATIS; - // Report MISSING only for containers with no replicas and keys > 0 - if (request.getContainerReplicas().isEmpty() && containerInfo.getNumberOfKeys() > 0) { + // TODO - review this logic - may need an empty check here + if (request.getContainerReplicas().isEmpty()) { request.getReport().incrementAndSample(ContainerHealthState.MISSING, containerInfo); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java index d75b2d982514..dca89171f0e3 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosingContainerHandler.java @@ -213,7 +213,7 @@ public void testClosingContainerStateIsNotUpdatedWhenThereAreReplicas() { @Test public void testClosingContainerStateIsUpdatedWhenThereAreNotReplicas() { ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( - RATIS_REPLICATION_CONFIG, 1, CLOSING, 1, 10); + RATIS_REPLICATION_CONFIG, 1, CLOSING); Set containerReplicas = new HashSet<>(); ReplicationManagerReport report = new ReplicationManagerReport(rmConf.getContainerSampleLimit()); ContainerCheckRequest request = new ContainerCheckRequest.Builder() diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java index 463b091f5502..b340c4077f44 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommands.java @@ -37,7 +37,7 @@ CloseSubcommand.class, ReportSubcommand.class, UpgradeSubcommand.class, - ReconcileSubcommand.class, + ReconcileSubcommand.class }) @MetaInfServices(AdminSubcommand.class) public class ContainerCommands implements AdminSubcommand { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java index f41b08b3f9c3..985adad58992 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java @@ -41,7 +41,7 @@ public List getValidatedIDs() { } public List getValidatedIDs(boolean required) { - if (required && size() == 0) { + if (required && size() == 0 && !isReadFromStdin()) { throw new CommandLine.MissingParameterException(spec.commandLine(), spec.commandLine().getCommandSpec().args(), "Missing required parameter: ''"); diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java index c1cf25627e09..baac9b828b0e 100644 --- a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java @@ -52,12 +52,7 @@ import picocli.CommandLine; /** - * Tests the complete workflow of suppressing empty containers: - * 1. Assert RM report shows EMPTY/MISSING containers - * 2. Suppress a MISSING container - * 3. List only suppressed containers - * 4. Unsuppress the container - * 5. List non-suppressed containers + * Tests the suppress/unsuppress options in container report. */ @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class TestContainerReportSuppressOptions { From 89a069167726bfbc0f19d037c10c315d00f4bbe6 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Wed, 1 Apr 2026 20:30:51 +0530 Subject: [PATCH 9/9] Add batch processing and update audit map --- .../hadoop/hdds/scm/client/ScmClient.java | 8 +++--- .../StorageContainerLocationProtocol.java | 8 +++--- ...ocationProtocolClientSideTranslatorPB.java | 10 +++++-- .../src/main/proto/ScmAdminProtocol.proto | 4 ++- ...ocationProtocolServerSideTranslatorPB.java | 6 ++-- .../scm/server/SCMClientProtocolServer.java | 28 +++++++++++++++---- .../scm/cli/ContainerOperationClient.java | 4 +-- .../cli/container/ContainerIDParameters.java | 3 +- .../scm/cli/container/ReportSubcommand.java | 23 +++++---------- .../TestContainerReportSuppressOptions.java | 8 ++++-- 10 files changed, 61 insertions(+), 41 deletions(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index e5456226c8cd..cb4b8471a00c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -485,13 +485,13 @@ DecommissionScmResponseProto decommissionScm( void reconcileContainer(long containerID) throws IOException; /** - * Suppress or unsuppress a container from reports. + * Suppress or unsuppress containers from reports. * Suppressed containers are excluded from replication manager reports * regardless of their health state. * - * @param containerId The ID of the container. - * @param suppress true to suppress the container, false to unsuppress it. + * @param containerIds container IDs to suppress or unsuppress + * @param suppress true to suppress, false to unsuppress * @throws IOException */ - void suppressContainer(long containerId, boolean suppress) throws IOException; + List suppressContainers(List containerIds, boolean suppress) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 890d45380654..98f8efa9ae30 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -547,11 +547,11 @@ DecommissionScmResponseProto decommissionScm( void reconcileContainer(long containerID) throws IOException; /** - * Suppress or unsuppress the container. + * Suppress or unsuppress containers from reports. * - * @param containerId The ID of the container. - * @param suppress true to suppress, false to unsuppress. + * @param containerIds container IDs to suppress or unsuppress + * @param suppress true to suppress, false to unsuppress * @throws IOException */ - void suppressContainer(long containerId, boolean suppress) throws IOException; + List suppressContainers(List containerIds, boolean suppress) throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 2b817476bdb6..56e2ef6408fc 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -128,6 +128,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopContainerBalancerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SuppressContainerRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SuppressContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmInfo; @@ -1327,13 +1328,16 @@ public void reconcileContainer(long containerID) throws IOException { } @Override - public void suppressContainer(long containerID, boolean suppress) + public List suppressContainers(List containerIds, boolean suppress) throws IOException { SuppressContainerRequestProto request = SuppressContainerRequestProto.newBuilder() - .setContainerID(containerID) + .addAllContainerIDs(containerIds) .setSuppress(suppress) .build(); - submitRequest(Type.SuppressContainer, builder -> builder.setSuppressContainerRequest(request)); + SuppressContainerResponseProto response = + submitRequest(Type.SuppressContainer, builder -> builder.setSuppressContainerRequest(request)) + .getSuppressContainerResponse(); + return response.getFailedContainerIDsList(); } /** diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index d361d4162e04..933bb4a00870 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -716,11 +716,13 @@ message ReconcileContainerResponseProto { } message SuppressContainerRequestProto { - optional int64 containerID = 1; + repeated int64 containerIDs = 1; optional bool suppress = 2; } message SuppressContainerResponseProto { + // Container IDs that failed to suppress or unsuppress. Empty if all succeeded. + repeated int64 failedContainerIDs = 1; } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 8ed26e4f4b73..73bf92e9cd58 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -1447,7 +1447,9 @@ public SCMListContainerIDsResponseProto listContainerIDs( } public SuppressContainerResponseProto suppressContainer(SuppressContainerRequestProto request) throws IOException { - impl.suppressContainer(request.getContainerID(), request.getSuppress()); - return SuppressContainerResponseProto.getDefaultInstance(); + List failedContainerIDs = impl.suppressContainers(request.getContainerIDsList(), request.getSuppress()); + return SuppressContainerResponseProto.newBuilder() + .addAllFailedContainerIDs(failedContainerIDs) + .build(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 229d30bde5cc..1217215734de 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -497,7 +497,8 @@ private ContainerListResult listContainerInternal(long startContainerID, int cou ReplicationConfig repConfig, Boolean suppressed) throws IOException { boolean auditSuccess = true; - Map auditMap = buildAuditMap(startContainerID, count, state, factor, replicationType, repConfig); + Map auditMap = buildAuditMap(startContainerID, count, state, factor, + replicationType, repConfig, suppressed); try { Stream containerStream = @@ -559,7 +560,8 @@ private Map buildAuditMap(long startContainerID, int count, HddsProtos.LifeCycleState state, HddsProtos.ReplicationFactor factor, HddsProtos.ReplicationType replicationType, - ReplicationConfig repConfig) { + ReplicationConfig repConfig, + Boolean suppressed) { Map auditMap = new HashMap<>(); auditMap.put("startContainerID", String.valueOf(startContainerID)); auditMap.put("count", String.valueOf(count)); @@ -575,6 +577,9 @@ private Map buildAuditMap(long startContainerID, int count, if (repConfig != null) { auditMap.put("replicationConfig", repConfig.toString()); } + if (suppressed != null) { + auditMap.put("suppressed", suppressed.toString()); + } return auditMap; } @@ -1725,14 +1730,27 @@ public void reconcileContainer(long longContainerID) throws IOException { } @Override - public void suppressContainer(long longContainerID, boolean suppress) throws IOException { + public List suppressContainers(List containerIds, boolean suppress) throws IOException { + getScm().checkAdminAccess(getRemoteUser(), false); + SCMAction action = suppress ? SCMAction.SUPPRESS_CONTAINER : SCMAction.UNSUPPRESS_CONTAINER; + List failedContainerIDs = new ArrayList<>(); + for (long containerId : containerIds) { + try { + persistContainerSuppression(containerId, suppress, action); + } catch (IOException ex) { + failedContainerIDs.add(containerId); + } + } + return failedContainerIDs; + } + + private void persistContainerSuppression(long longContainerID, boolean suppress, SCMAction action) + throws IOException { ContainerID containerID = ContainerID.valueOf(longContainerID); final Map auditMap = new HashMap<>(); auditMap.put("containerID", containerID.toString()); auditMap.put("suppress", String.valueOf(suppress)); - SCMAction action = suppress ? SCMAction.SUPPRESS_CONTAINER : SCMAction.UNSUPPRESS_CONTAINER; try { - getScm().checkAdminAccess(getRemoteUser(), false); ContainerInfo containerInfo = scm.getContainerManager().getContainer(containerID); containerInfo.setSuppressed(suppress); scm.getContainerManager().updateContainerInfo(containerID, containerInfo.getProtobuf()); diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index 14e52b688320..c1973891d0aa 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -632,7 +632,7 @@ public void reconcileContainer(long id) throws IOException { } @Override - public void suppressContainer(long containerId, boolean suppress) throws IOException { - storageContainerLocationClient.suppressContainer(containerId, suppress); + public List suppressContainers(List containerIds, boolean suppress) throws IOException { + return storageContainerLocationClient.suppressContainers(containerIds, suppress); } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java index 985adad58992..a201cc2ab9fe 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerIDParameters.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.cli.container; import java.util.ArrayList; +import java.util.LinkedHashSet; import java.util.List; import org.apache.hadoop.hdds.cli.ItemsFromStdin; import picocli.CommandLine; @@ -71,6 +72,6 @@ public List getValidatedIDs(boolean required) { throw new CommandLine.ParameterException(spec.commandLine(), "Container IDs must be positive integers. Invalid container IDs: " + String.join(" ", invalidIDs)); } - return containerIDs; + return new ArrayList<>(new LinkedHashSet<>(containerIDs)); } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java index 460a08e3d452..737ed248dd76 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReportSubcommand.java @@ -63,14 +63,6 @@ static class SuppressOptions { @CommandLine.Option(names = {"--unsuppress"}, description = "Unsuppress container(s) to include in future reports") private boolean unsuppress; - - public boolean isSuppress() { - return suppress; - } - - public boolean isUnsuppress() { - return unsuppress; - } } @Override @@ -84,18 +76,17 @@ public void execute(ScmClient scmClient) throws IOException { } private void handleSuppressUnsuppress(ScmClient scmClient) throws IOException { - boolean suppress = suppressOptions.isSuppress(); + boolean suppress = suppressOptions.suppress; List containerIDs = containerList.getValidatedIDs(); + List failedContainerIDs = scmClient.suppressContainers(containerIDs, suppress); int failures = 0; for (long id : containerIDs) { - try { - scmClient.suppressContainer(id, suppress); - out().println((suppress ? "Suppressed" : "Unsuppressed") + " container: " + id); - } catch (IOException e) { - err().println("Failed to " + (suppress ? "suppress" : "unsuppress") + - " container " + id + ": " + e.getMessage()); + if (failedContainerIDs.contains(id)) { + err().println("Failed to " + (suppress ? "suppress" : "unsuppress") + " container " + id + "."); failures++; + } else { + out().println((suppress ? "Suppressed" : "Unsuppressed") + " container: " + id); } } @@ -106,7 +97,7 @@ private void handleSuppressUnsuppress(ScmClient scmClient) throws IOException { } if (failures > 0) { - throw new IOException("Failed to " + (suppress ? "suppress" : "unsuppress") + failures + " container(s)."); + throw new IOException("Failed to " + (suppress ? "suppress " : "unsuppress ") + failures + " container(s)."); } } diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java index baac9b828b0e..c315847ee237 100644 --- a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/container/TestContainerReportSuppressOptions.java @@ -22,9 +22,10 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -33,6 +34,7 @@ import java.io.PrintStream; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeID; @@ -84,8 +86,8 @@ public void setup() throws IOException { .thenAnswer(inv -> listNonSuppressedContainers()); // Mock suppress/unsuppress - doNothing().when(scmClient).suppressContainer(anyLong(), eq(true)); - doNothing().when(scmClient).suppressContainer(anyLong(), eq(false)); + doReturn(Collections.emptyList()).when(scmClient).suppressContainers(anyList(), eq(true)); + doReturn(Collections.emptyList()).when(scmClient).suppressContainers(anyList(), eq(false)); System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING));