18
18
package org .apache .hadoop .hdfs ;
19
19
20
20
import static org .junit .Assert .assertEquals ;
21
- import static org .junit .Assert .assertNull ;
22
21
import static org .junit .Assert .assertTrue ;
23
22
import static org .junit .Assert .fail ;
24
23
30
29
import java .util .List ;
31
30
import java .util .Map ;
32
31
33
- import com .google .common .collect .Lists ;
34
32
import org .apache .hadoop .fs .FSDataOutputStream ;
35
- import org .apache .hadoop .hdfs .server .namenode .NameNodeAdapter ;
36
- import org .junit .Assert ;
37
- import org .slf4j .Logger ;
38
- import org .slf4j .LoggerFactory ;
39
33
import org .apache .hadoop .fs .FileSystem ;
40
34
import org .apache .hadoop .fs .Path ;
41
35
import org .apache .hadoop .hdfs .client .HdfsDataInputStream ;
48
42
import org .apache .hadoop .hdfs .server .blockmanagement .DatanodeStorageInfo ;
49
43
import org .apache .hadoop .hdfs .server .datanode .DataNode ;
50
44
import org .apache .hadoop .hdfs .server .namenode .FSNamesystem ;
45
+ import org .apache .hadoop .hdfs .server .namenode .NameNodeAdapter ;
46
+ import org .apache .hadoop .test .GenericTestUtils ;
51
47
import org .apache .hadoop .util .Time ;
48
+ import org .junit .Assert ;
52
49
import org .junit .Test ;
50
+ import org .slf4j .Logger ;
51
+ import org .slf4j .LoggerFactory ;
52
+
53
+ import com .google .common .base .Supplier ;
54
+ import com .google .common .collect .Lists ;
53
55
54
56
/**
55
57
* This class tests node maintenance.
@@ -125,8 +127,8 @@ public void testTakeNodeOutOfEnteringMaintenance() throws Exception {
125
127
126
128
// When node is in ENTERING_MAINTENANCE state, it can still serve read
127
129
// requests
128
- assertNull ( checkWithRetry (ns , fileSys , file , replicas , null ,
129
- nodeOutofService )) ;
130
+ checkWithRetry (ns , fileSys , file , replicas , null ,
131
+ nodeOutofService );
130
132
131
133
putNodeInService (0 , nodeOutofService .getDatanodeUuid ());
132
134
@@ -387,8 +389,8 @@ private void testExpectedReplication(int replicationFactor,
387
389
388
390
// The block should be replicated to another datanode to meet
389
391
// expected replication count.
390
- assertNull ( checkWithRetry (ns , fileSys , file , expectedReplicasInRead ,
391
- nodeOutofService )) ;
392
+ checkWithRetry (ns , fileSys , file , expectedReplicasInRead ,
393
+ nodeOutofService );
392
394
393
395
cleanupFile (fileSys , file );
394
396
teardown ();
@@ -548,19 +550,19 @@ public void testTransitionToDecommission() throws IOException {
548
550
client .datanodeReport (DatanodeReportType .LIVE ).length );
549
551
550
552
// test 1, verify the replica in IN_MAINTENANCE state isn't in LocatedBlock
551
- assertNull ( checkWithRetry (ns , fileSys , file , replicas - 1 ,
552
- nodeOutofService )) ;
553
+ checkWithRetry (ns , fileSys , file , replicas - 1 ,
554
+ nodeOutofService );
553
555
554
556
takeNodeOutofService (0 , nodeOutofService .getDatanodeUuid (), 0 , null ,
555
557
AdminStates .DECOMMISSIONED );
556
558
557
559
// test 2 after decommission has completed, the replication count is
558
560
// replicas + 1 which includes the decommissioned node.
559
- assertNull ( checkWithRetry (ns , fileSys , file , replicas + 1 , null ) );
561
+ checkWithRetry (ns , fileSys , file , replicas + 1 , null );
560
562
561
563
// test 3, put the node in service, replication count should restore.
562
564
putNodeInService (0 , nodeOutofService .getDatanodeUuid ());
563
- assertNull ( checkWithRetry (ns , fileSys , file , replicas , null ) );
565
+ checkWithRetry (ns , fileSys , file , replicas , null );
564
566
565
567
cleanupFile (fileSys , file );
566
568
}
@@ -587,8 +589,8 @@ public void testTransitionFromDecommissioning() throws IOException {
587
589
takeNodeOutofService (0 , nodeOutofService .getDatanodeUuid (), Long .MAX_VALUE ,
588
590
null , AdminStates .IN_MAINTENANCE );
589
591
590
- assertNull ( checkWithRetry (ns , fileSys , file , replicas - 1 ,
591
- nodeOutofService )) ;
592
+ checkWithRetry (ns , fileSys , file , replicas - 1 ,
593
+ nodeOutofService );
592
594
593
595
cleanupFile (fileSys , file );
594
596
}
@@ -631,10 +633,10 @@ private void testDecommissionDifferentNodeAfterMaintenance(int repl)
631
633
takeNodeOutofService (0 , decommissionDNUuid , 0 , null , maintenanceNodes ,
632
634
AdminStates .DECOMMISSIONED );
633
635
// Out of the replicas returned, one is the decommissioned node.
634
- assertNull ( checkWithRetry (ns , fileSys , file , repl , maintenanceDN ) );
636
+ checkWithRetry (ns , fileSys , file , repl , maintenanceDN );
635
637
636
638
putNodeInService (0 , maintenanceDN );
637
- assertNull ( checkWithRetry (ns , fileSys , file , repl + 1 , null ) );
639
+ checkWithRetry (ns , fileSys , file , repl + 1 , null );
638
640
639
641
cleanupFile (fileSys , file );
640
642
teardown ();
@@ -663,15 +665,15 @@ public void testMultipleNodesMaintenance() throws Exception {
663
665
AdminStates .IN_MAINTENANCE );
664
666
665
667
// Verify file replication matches maintenance state min replication
666
- assertNull ( checkWithRetry (ns , fileSys , file , 1 , null , nodes [0 ]) );
668
+ checkWithRetry (ns , fileSys , file , 1 , null , nodes [0 ]);
667
669
668
670
// Put the maintenance nodes back in service
669
671
for (DatanodeInfo datanodeInfo : maintenanceDN ) {
670
672
putNodeInService (0 , datanodeInfo );
671
673
}
672
674
673
675
// Verify file replication catching up to the old state
674
- assertNull ( checkWithRetry (ns , fileSys , file , repl , null ) );
676
+ checkWithRetry (ns , fileSys , file , repl , null );
675
677
676
678
cleanupFile (fileSys , file );
677
679
}
@@ -720,19 +722,19 @@ private void testChangeReplicationFactor(int oldFactor, int newFactor,
720
722
721
723
// Verify that the nodeOutofService remains in blocksMap and
722
724
// # of live replicas For read operation is expected.
723
- assertNull ( checkWithRetry (ns , fileSys , file , oldFactor - 1 ,
724
- nodeOutofService )) ;
725
+ checkWithRetry (ns , fileSys , file , oldFactor - 1 ,
726
+ nodeOutofService );
725
727
726
728
final DFSClient client = getDfsClient (0 );
727
729
client .setReplication (file .toString (), (short )newFactor );
728
730
729
731
// Verify that the nodeOutofService remains in blocksMap and
730
732
// # of live replicas for read operation.
731
- assertNull ( checkWithRetry (ns , fileSys , file , expectedLiveReplicas ,
732
- nodeOutofService )) ;
733
+ checkWithRetry (ns , fileSys , file , expectedLiveReplicas ,
734
+ nodeOutofService );
733
735
734
736
putNodeInService (0 , nodeOutofService .getDatanodeUuid ());
735
- assertNull ( checkWithRetry (ns , fileSys , file , newFactor , null ) );
737
+ checkWithRetry (ns , fileSys , file , newFactor , null );
736
738
737
739
cleanupFile (fileSys , file );
738
740
teardown ();
@@ -765,8 +767,8 @@ public void testTakeDeadNodeOutOfMaintenance() throws Exception {
765
767
getFirstBlockFirstReplicaUuid (fileSys , file ), Long .MAX_VALUE , null ,
766
768
AdminStates .IN_MAINTENANCE );
767
769
768
- assertNull ( checkWithRetry (ns , fileSys , file , replicas - 1 ,
769
- nodeOutofService )) ;
770
+ checkWithRetry (ns , fileSys , file , replicas - 1 ,
771
+ nodeOutofService );
770
772
771
773
final DFSClient client = getDfsClient (0 );
772
774
assertEquals ("All datanodes must be alive" , numDatanodes ,
@@ -779,16 +781,16 @@ public void testTakeDeadNodeOutOfMaintenance() throws Exception {
779
781
client .datanodeReport (DatanodeReportType .LIVE ).length );
780
782
781
783
// Dead maintenance node's blocks should remain in block map.
782
- assertNull ( checkWithRetry (ns , fileSys , file , replicas - 1 ,
783
- nodeOutofService )) ;
784
+ checkWithRetry (ns , fileSys , file , replicas - 1 ,
785
+ nodeOutofService );
784
786
785
787
// When dead maintenance mode is transitioned to out of maintenance mode,
786
788
// its blocks should be removed from block map.
787
789
// This will then trigger replication to restore the live replicas back
788
790
// to replication factor.
789
791
putNodeInService (0 , nodeOutofService .getDatanodeUuid ());
790
- assertNull ( checkWithRetry (ns , fileSys , file , replicas , nodeOutofService ,
791
- null )) ;
792
+ checkWithRetry (ns , fileSys , file , replicas , nodeOutofService ,
793
+ null );
792
794
793
795
cleanupFile (fileSys , file );
794
796
}
@@ -821,8 +823,8 @@ public void testWithNNAndDNRestart() throws Exception {
821
823
getFirstBlockFirstReplicaUuid (fileSys , file ), Long .MAX_VALUE , null ,
822
824
AdminStates .IN_MAINTENANCE );
823
825
824
- assertNull ( checkWithRetry (ns , fileSys , file , replicas - 1 ,
825
- nodeOutofService )) ;
826
+ checkWithRetry (ns , fileSys , file , replicas - 1 ,
827
+ nodeOutofService );
826
828
827
829
DFSClient client = getDfsClient (0 );
828
830
assertEquals ("All datanodes must be alive" , numDatanodes ,
@@ -836,23 +838,23 @@ public void testWithNNAndDNRestart() throws Exception {
836
838
client .datanodeReport (DatanodeReportType .LIVE ).length );
837
839
838
840
// Dead maintenance node's blocks should remain in block map.
839
- assertNull ( checkWithRetry (ns , fileSys , file , replicas - 1 ,
840
- nodeOutofService )) ;
841
+ checkWithRetry (ns , fileSys , file , replicas - 1 ,
842
+ nodeOutofService );
841
843
842
844
// restart nn, nn will restore 3 live replicas given it doesn't
843
845
// know the maintenance node has the replica.
844
846
getCluster ().restartNameNode (0 );
845
847
ns = getCluster ().getNamesystem (0 );
846
- assertNull ( checkWithRetry (ns , fileSys , file , replicas , null ) );
848
+ checkWithRetry (ns , fileSys , file , replicas , null );
847
849
848
850
// restart dn, nn has 1 maintenance replica and 3 live replicas.
849
851
getCluster ().restartDataNode (dnProp , true );
850
852
getCluster ().waitActive ();
851
- assertNull ( checkWithRetry (ns , fileSys , file , replicas , nodeOutofService ) );
853
+ checkWithRetry (ns , fileSys , file , replicas , nodeOutofService );
852
854
853
855
// Put the node in service, a redundant replica should be removed.
854
856
putNodeInService (0 , nodeOutofService .getDatanodeUuid ());
855
- assertNull ( checkWithRetry (ns , fileSys , file , replicas , null ) );
857
+ checkWithRetry (ns , fileSys , file , replicas , null );
856
858
857
859
cleanupFile (fileSys , file );
858
860
}
@@ -878,12 +880,12 @@ public void testWriteAfterMaintenance() throws IOException {
878
880
writeFile (fileSys , file , replicas , 2 );
879
881
880
882
// Verify nodeOutofService wasn't chosen for write operation.
881
- assertNull ( checkWithRetry (ns , fileSys , file , replicas - 1 ,
882
- nodeOutofService , null )) ;
883
+ checkWithRetry (ns , fileSys , file , replicas - 1 ,
884
+ nodeOutofService , null );
883
885
884
886
// Put the node back to service, live replicas should be restored.
885
887
putNodeInService (0 , nodeOutofService .getDatanodeUuid ());
886
- assertNull ( checkWithRetry (ns , fileSys , file , replicas , null ) );
888
+ checkWithRetry (ns , fileSys , file , replicas , null );
887
889
888
890
cleanupFile (fileSys , file );
889
891
}
@@ -934,12 +936,12 @@ public void testInvalidation() throws IOException {
934
936
client .setReplication (file .toString (), (short ) 1 );
935
937
936
938
// Verify the nodeOutofService remains in blocksMap.
937
- assertNull ( checkWithRetry (ns , fileSys , file , 1 , nodeOutofService ) );
939
+ checkWithRetry (ns , fileSys , file , 1 , nodeOutofService );
938
940
939
941
// Restart NN and verify the nodeOutofService remains in blocksMap.
940
942
getCluster ().restartNameNode (0 );
941
943
ns = getCluster ().getNamesystem (0 );
942
- assertNull ( checkWithRetry (ns , fileSys , file , 1 , nodeOutofService ) );
944
+ checkWithRetry (ns , fileSys , file , 1 , nodeOutofService );
943
945
944
946
cleanupFile (fileSys , file );
945
947
}
@@ -1081,30 +1083,32 @@ static String checkFile(FSNamesystem ns, FileSystem fileSys,
1081
1083
return null ;
1082
1084
}
1083
1085
1084
- static String checkWithRetry (FSNamesystem ns , FileSystem fileSys ,
1085
- Path name , int repl , DatanodeInfo inMaintenanceNode )
1086
- throws IOException {
1087
- return checkWithRetry (ns , fileSys , name , repl , inMaintenanceNode ,
1086
+ static void checkWithRetry (FSNamesystem ns , FileSystem fileSys , Path name ,
1087
+ int repl , DatanodeInfo inMaintenanceNode ) {
1088
+ checkWithRetry (ns , fileSys , name , repl , inMaintenanceNode ,
1088
1089
inMaintenanceNode );
1089
1090
}
1090
1091
1091
- static String checkWithRetry (FSNamesystem ns , FileSystem fileSys ,
1092
- Path name , int repl , DatanodeInfo excludedNode ,
1093
- DatanodeInfo underMaintenanceNode ) throws IOException {
1094
- int tries = 0 ;
1095
- String output = null ;
1096
- while (tries ++ < 200 ) {
1097
- try {
1098
- Thread .sleep (100 );
1099
- output = checkFile (ns , fileSys , name , repl , excludedNode ,
1100
- underMaintenanceNode );
1101
- if (output == null ) {
1102
- break ;
1092
+ static void checkWithRetry (final FSNamesystem ns , final FileSystem fileSys ,
1093
+ final Path name , final int repl , final DatanodeInfo excludedNode ,
1094
+ final DatanodeInfo underMaintenanceNode ) {
1095
+ try {
1096
+ GenericTestUtils .waitFor (new Supplier <Boolean >() {
1097
+
1098
+ @ Override
1099
+ public Boolean get () {
1100
+ String output = null ;
1101
+ try {
1102
+ output = checkFile (ns , fileSys , name , repl , excludedNode ,
1103
+ underMaintenanceNode );
1104
+ } catch (Exception ignored ) {
1105
+ }
1106
+
1107
+ return (output == null );
1103
1108
}
1104
- } catch ( InterruptedException ie ) {
1105
- }
1109
+ }, 100 , 60000 );
1110
+ } catch ( Exception ignored ) {
1106
1111
}
1107
- return output ;
1108
1112
}
1109
1113
1110
1114
static private DatanodeInfo [] getFirstBlockReplicasDatanodeInfos (
0 commit comments