@@ -880,6 +880,88 @@ static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r,
880880 }
881881}
882882
883+ static void __dlm_master_lookup (struct dlm_ls * ls , struct dlm_rsb * r , int our_nodeid ,
884+ int from_nodeid , bool toss_list , unsigned int flags ,
885+ int * r_nodeid , int * result )
886+ {
887+ int fix_master = (flags & DLM_LU_RECOVER_MASTER );
888+ int from_master = (flags & DLM_LU_RECOVER_DIR );
889+
890+ if (r -> res_dir_nodeid != our_nodeid ) {
891+ /* should not happen, but may as well fix it and carry on */
892+ log_error (ls , "%s res_dir %d our %d %s" , __func__ ,
893+ r -> res_dir_nodeid , our_nodeid , r -> res_name );
894+ r -> res_dir_nodeid = our_nodeid ;
895+ }
896+
897+ if (fix_master && dlm_is_removed (ls , r -> res_master_nodeid )) {
898+ /* Recovery uses this function to set a new master when
899+ * the previous master failed. Setting NEW_MASTER will
900+ * force dlm_recover_masters to call recover_master on this
901+ * rsb even though the res_nodeid is no longer removed.
902+ */
903+
904+ r -> res_master_nodeid = from_nodeid ;
905+ r -> res_nodeid = from_nodeid ;
906+ rsb_set_flag (r , RSB_NEW_MASTER );
907+
908+ if (toss_list ) {
909+ /* I don't think we should ever find it on toss list. */
910+ log_error (ls , "%s fix_master on toss" , __func__ );
911+ dlm_dump_rsb (r );
912+ }
913+ }
914+
915+ if (from_master && (r -> res_master_nodeid != from_nodeid )) {
916+ /* this will happen if from_nodeid became master during
917+ * a previous recovery cycle, and we aborted the previous
918+ * cycle before recovering this master value
919+ */
920+
921+ log_limit (ls , "%s from_master %d master_nodeid %d res_nodeid %d first %x %s" ,
922+ __func__ , from_nodeid , r -> res_master_nodeid ,
923+ r -> res_nodeid , r -> res_first_lkid , r -> res_name );
924+
925+ if (r -> res_master_nodeid == our_nodeid ) {
926+ log_error (ls , "from_master %d our_master" , from_nodeid );
927+ dlm_dump_rsb (r );
928+ goto ret_assign ;
929+ }
930+
931+ r -> res_master_nodeid = from_nodeid ;
932+ r -> res_nodeid = from_nodeid ;
933+ rsb_set_flag (r , RSB_NEW_MASTER );
934+ }
935+
936+ if (!r -> res_master_nodeid ) {
937+ /* this will happen if recovery happens while we're looking
938+ * up the master for this rsb
939+ */
940+
941+ log_debug (ls , "%s master 0 to %d first %x %s" , __func__ ,
942+ from_nodeid , r -> res_first_lkid , r -> res_name );
943+ r -> res_master_nodeid = from_nodeid ;
944+ r -> res_nodeid = from_nodeid ;
945+ }
946+
947+ if (!from_master && !fix_master &&
948+ (r -> res_master_nodeid == from_nodeid )) {
949+ /* this can happen when the master sends remove, the dir node
950+ * finds the rsb on the keep list and ignores the remove,
951+ * and the former master sends a lookup
952+ */
953+
954+ log_limit (ls , "%s from master %d flags %x first %x %s" ,
955+ __func__ , from_nodeid , flags , r -> res_first_lkid ,
956+ r -> res_name );
957+ }
958+
959+ ret_assign :
960+ * r_nodeid = r -> res_master_nodeid ;
961+ if (result )
962+ * result = DLM_LU_MATCH ;
963+ }
964+
883965/*
884966 * We're the dir node for this res and another node wants to know the
885967 * master nodeid. During normal operation (non recovery) this is only
@@ -914,10 +996,8 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
914996{
915997 struct dlm_rsb * r = NULL ;
916998 uint32_t hash , b ;
917- int from_master = (flags & DLM_LU_RECOVER_DIR );
918- int fix_master = (flags & DLM_LU_RECOVER_MASTER );
919999 int our_nodeid = dlm_our_nodeid ();
920- int dir_nodeid , error , toss_list = 0 ;
1000+ int dir_nodeid , error ;
9211001
9221002 if (len > DLM_RESNAME_MAXLEN )
9231003 return - EINVAL ;
@@ -949,103 +1029,38 @@ int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
9491029 error = dlm_search_rsb_tree (& ls -> ls_rsbtbl [b ].keep , name , len , & r );
9501030 if (!error ) {
9511031 /* because the rsb is active, we need to lock_rsb before
952- checking/changing re_master_nodeid */
1032+ * checking/changing re_master_nodeid
1033+ */
9531034
9541035 hold_rsb (r );
9551036 spin_unlock (& ls -> ls_rsbtbl [b ].lock );
9561037 lock_rsb (r );
957- } else {
958- error = dlm_search_rsb_tree (& ls -> ls_rsbtbl [b ].toss , name , len , & r );
959- if (error )
960- goto not_found ;
961-
962- /* because the rsb is inactive (on toss list), it's not refcounted
963- * and lock_rsb is not used, but is protected by the rsbtbl lock
964- */
965-
966- toss_list = 1 ;
967- }
968-
969- if (r -> res_dir_nodeid != our_nodeid ) {
970- /* should not happen, but may as well fix it and carry on */
971- log_error (ls , "dlm_master_lookup res_dir %d our %d %s" ,
972- r -> res_dir_nodeid , our_nodeid , r -> res_name );
973- r -> res_dir_nodeid = our_nodeid ;
974- }
975-
976- if (fix_master && dlm_is_removed (ls , r -> res_master_nodeid )) {
977- /* Recovery uses this function to set a new master when
978- the previous master failed. Setting NEW_MASTER will
979- force dlm_recover_masters to call recover_master on this
980- rsb even though the res_nodeid is no longer removed. */
981-
982- r -> res_master_nodeid = from_nodeid ;
983- r -> res_nodeid = from_nodeid ;
984- rsb_set_flag (r , RSB_NEW_MASTER );
985-
986- if (toss_list ) {
987- /* I don't think we should ever find it on toss list. */
988- log_error (ls , "dlm_master_lookup fix_master on toss" );
989- dlm_dump_rsb (r );
990- }
991- }
9921038
993- if (from_master && (r -> res_master_nodeid != from_nodeid )) {
994- /* this will happen if from_nodeid became master during
995- a previous recovery cycle, and we aborted the previous
996- cycle before recovering this master value */
1039+ __dlm_master_lookup (ls , r , our_nodeid , from_nodeid , false,
1040+ flags , r_nodeid , result );
9971041
998- log_limit (ls , "dlm_master_lookup from_master %d "
999- "master_nodeid %d res_nodeid %d first %x %s" ,
1000- from_nodeid , r -> res_master_nodeid , r -> res_nodeid ,
1001- r -> res_first_lkid , r -> res_name );
1002-
1003- if (r -> res_master_nodeid == our_nodeid ) {
1004- log_error (ls , "from_master %d our_master" , from_nodeid );
1005- dlm_dump_rsb (r );
1006- goto out_found ;
1007- }
1042+ /* the rsb was active */
1043+ unlock_rsb (r );
1044+ put_rsb (r );
10081045
1009- r -> res_master_nodeid = from_nodeid ;
1010- r -> res_nodeid = from_nodeid ;
1011- rsb_set_flag (r , RSB_NEW_MASTER );
1046+ return 0 ;
10121047 }
10131048
1014- if (!r -> res_master_nodeid ) {
1015- /* this will happen if recovery happens while we're looking
1016- up the master for this rsb */
1017-
1018- log_debug (ls , "dlm_master_lookup master 0 to %d first %x %s" ,
1019- from_nodeid , r -> res_first_lkid , r -> res_name );
1020- r -> res_master_nodeid = from_nodeid ;
1021- r -> res_nodeid = from_nodeid ;
1022- }
1049+ error = dlm_search_rsb_tree (& ls -> ls_rsbtbl [b ].toss , name , len , & r );
1050+ if (error )
1051+ goto not_found ;
10231052
1024- if (!from_master && !fix_master &&
1025- (r -> res_master_nodeid == from_nodeid )) {
1026- /* this can happen when the master sends remove, the dir node
1027- finds the rsb on the keep list and ignores the remove,
1028- and the former master sends a lookup */
1053+ /* because the rsb is inactive (on toss list), it's not refcounted
1054+ * and lock_rsb is not used, but is protected by the rsbtbl lock
1055+ */
10291056
1030- log_limit (ls , "dlm_master_lookup from master %d flags %x "
1031- "first %x %s" , from_nodeid , flags ,
1032- r -> res_first_lkid , r -> res_name );
1033- }
1057+ __dlm_master_lookup (ls , r , our_nodeid , from_nodeid , true, flags ,
1058+ r_nodeid , result );
10341059
1035- out_found :
1036- * r_nodeid = r -> res_master_nodeid ;
1037- if (result )
1038- * result = DLM_LU_MATCH ;
1060+ r -> res_toss_time = jiffies ;
1061+ /* the rsb was inactive (on toss list) */
1062+ spin_unlock (& ls -> ls_rsbtbl [b ].lock );
10391063
1040- if (toss_list ) {
1041- r -> res_toss_time = jiffies ;
1042- /* the rsb was inactive (on toss list) */
1043- spin_unlock (& ls -> ls_rsbtbl [b ].lock );
1044- } else {
1045- /* the rsb was active */
1046- unlock_rsb (r );
1047- put_rsb (r );
1048- }
10491064 return 0 ;
10501065
10511066 not_found :
0 commit comments