@@ -583,7 +583,7 @@ func (ep *endpoint) deleteDriverInfoFromCluster() error {
583583 return nil
584584}
585585
586- func (ep * endpoint ) addServiceInfoToCluster () error {
586+ func (ep * endpoint ) addServiceInfoToCluster (sb * sandbox ) error {
587587 if ep .isAnonymous () && len (ep .myAliases ) == 0 || ep .Iface ().Address () == nil {
588588 return nil
589589 }
@@ -593,24 +593,49 @@ func (ep *endpoint) addServiceInfoToCluster() error {
593593 return nil
594594 }
595595
596+ sb .Service .Lock ()
597+ defer sb .Service .Unlock ()
598+ logrus .Debugf ("addServiceInfoToCluster START for %s %s" , ep .svcName , ep .ID ())
599+
600+ // Check that the endpoint is still present on the sandbox before adding it to the service discovery.
601+ // This is to handle a race between the EnableService and the sbLeave
602+ // It is possible that the EnableService starts, fetches the list of the endpoints and
603+ // by the time the addServiceInfoToCluster is called the endpoint got removed from the sandbox
604+ // The risk is that the deleteServiceInfoToCluster happens before the addServiceInfoToCluster.
605+ // This check under the Service lock of the sandbox ensure the correct behavior.
606+ // If the addServiceInfoToCluster arrives first may find or not the endpoint and will proceed or exit
607+ // but in any case the deleteServiceInfoToCluster will follow doing the cleanup if needed.
608+ // In case the deleteServiceInfoToCluster arrives first, this one is happening after the endpoint is
609+ // removed from the list, in this situation the delete will bail out not finding any data to cleanup
610+ // and the add will bail out not finding the endpoint on the sandbox.
611+ if e := sb .getEndpoint (ep .ID ()); e == nil {
612+ logrus .Warnf ("addServiceInfoToCluster suppressing service resolution ep is not anymore in the sandbox %s" , ep .ID ())
613+ return nil
614+ }
615+
596616 c := n .getController ()
597617 agent := c .getAgent ()
598618
619+ name := ep .Name ()
620+ if ep .isAnonymous () {
621+ name = ep .MyAliases ()[0 ]
622+ }
623+
599624 var ingressPorts []* PortConfig
600625 if ep .svcID != "" {
626+ // This is a task part of a service
601627 // Gossip ingress ports only in ingress network.
602628 if n .ingress {
603629 ingressPorts = ep .ingressPorts
604630 }
605-
606- if err := c .addServiceBinding (ep .svcName , ep .svcID , n .ID (), ep .ID (), ep .virtualIP , ingressPorts , ep .svcAliases , ep .Iface ().Address ().IP ); err != nil {
631+ if err := c .addServiceBinding (ep .svcName , ep .svcID , n .ID (), ep .ID (), name , ep .virtualIP , ingressPorts , ep .svcAliases , ep .myAliases , ep .Iface ().Address ().IP , "addServiceInfoToCluster" ); err != nil {
632+ return err
633+ }
634+ } else {
635+ // This is a container simply attached to an attachable network
636+ if err := c .addContainerNameResolution (n .ID (), ep .ID (), name , ep .myAliases , ep .Iface ().Address ().IP , "addServiceInfoToCluster" ); err != nil {
607637 return err
608638 }
609- }
610-
611- name := ep .Name ()
612- if ep .isAnonymous () {
613- name = ep .MyAliases ()[0 ]
614639 }
615640
616641 buf , err := proto .Marshal (& EndpointRecord {
@@ -634,10 +659,12 @@ func (ep *endpoint) addServiceInfoToCluster() error {
634659 }
635660 }
636661
662+ logrus .Debugf ("addServiceInfoToCluster END for %s %s" , ep .svcName , ep .ID ())
663+
637664 return nil
638665}
639666
640- func (ep * endpoint ) deleteServiceInfoFromCluster () error {
667+ func (ep * endpoint ) deleteServiceInfoFromCluster (sb * sandbox , method string ) error {
641668 if ep .isAnonymous () && len (ep .myAliases ) == 0 {
642669 return nil
643670 }
@@ -647,17 +674,33 @@ func (ep *endpoint) deleteServiceInfoFromCluster() error {
647674 return nil
648675 }
649676
677+ sb .Service .Lock ()
678+ defer sb .Service .Unlock ()
679+ logrus .Debugf ("deleteServiceInfoFromCluster from %s START for %s %s" , method , ep .svcName , ep .ID ())
680+
650681 c := n .getController ()
651682 agent := c .getAgent ()
652683
653- if ep .svcID != "" && ep .Iface ().Address () != nil {
654- var ingressPorts []* PortConfig
655- if n .ingress {
656- ingressPorts = ep .ingressPorts
657- }
684+ name := ep .Name ()
685+ if ep .isAnonymous () {
686+ name = ep .MyAliases ()[0 ]
687+ }
658688
659- if err := c .rmServiceBinding (ep .svcName , ep .svcID , n .ID (), ep .ID (), ep .virtualIP , ingressPorts , ep .svcAliases , ep .Iface ().Address ().IP ); err != nil {
660- return err
689+ if ep .Iface ().Address () != nil {
690+ if ep .svcID != "" {
691+ // This is a task part of a service
692+ var ingressPorts []* PortConfig
693+ if n .ingress {
694+ ingressPorts = ep .ingressPorts
695+ }
696+ if err := c .rmServiceBinding (ep .svcName , ep .svcID , n .ID (), ep .ID (), name , ep .virtualIP , ingressPorts , ep .svcAliases , ep .myAliases , ep .Iface ().Address ().IP , "deleteServiceInfoFromCluster" ); err != nil {
697+ return err
698+ }
699+ } else {
700+ // This is a container simply attached to an attachable network
701+ if err := c .delContainerNameResolution (n .ID (), ep .ID (), name , ep .myAliases , ep .Iface ().Address ().IP , "deleteServiceInfoFromCluster" ); err != nil {
702+ return err
703+ }
661704 }
662705 }
663706
@@ -667,6 +710,8 @@ func (ep *endpoint) deleteServiceInfoFromCluster() error {
667710 }
668711 }
669712
713+ logrus .Debugf ("deleteServiceInfoFromCluster from %s END for %s %s" , method , ep .svcName , ep .ID ())
714+
670715 return nil
671716}
672717
@@ -814,58 +859,56 @@ func (c *controller) handleEpTableEvent(ev events.Event) {
814859 value = event .Value
815860 case networkdb.UpdateEvent :
816861 logrus .Errorf ("Unexpected update service table event = %#v" , event )
817- }
818-
819- nw , err := c .NetworkByID (nid )
820- if err != nil {
821- logrus .Errorf ("Could not find network %s while handling service table event: %v" , nid , err )
822862 return
823863 }
824- n := nw .(* network )
825864
826- err = proto .Unmarshal (value , & epRec )
865+ err : = proto .Unmarshal (value , & epRec )
827866 if err != nil {
828867 logrus .Errorf ("Failed to unmarshal service table value: %v" , err )
829868 return
830869 }
831870
832- name := epRec .Name
871+ containerName := epRec .Name
833872 svcName := epRec .ServiceName
834873 svcID := epRec .ServiceID
835874 vip := net .ParseIP (epRec .VirtualIP )
836875 ip := net .ParseIP (epRec .EndpointIP )
837876 ingressPorts := epRec .IngressPorts
838- aliases := epRec .Aliases
839- taskaliases := epRec .TaskAliases
877+ serviceAliases := epRec .Aliases
878+ taskAliases := epRec .TaskAliases
840879
841- if name == "" || ip == nil {
880+ if containerName == "" || ip == nil {
842881 logrus .Errorf ("Invalid endpoint name/ip received while handling service table event %s" , value )
843882 return
844883 }
845884
846885 if isAdd {
886+ logrus .Debugf ("handleEpTableEvent ADD %s R:%v" , isAdd , eid , epRec )
847887 if svcID != "" {
848- if err := c .addServiceBinding (svcName , svcID , nid , eid , vip , ingressPorts , aliases , ip ); err != nil {
849- logrus .Errorf ("Failed adding service binding for value %s: %v" , value , err )
888+ // This is a remote task part of a service
889+ if err := c .addServiceBinding (svcName , svcID , nid , eid , containerName , vip , ingressPorts , serviceAliases , taskAliases , ip , "handleEpTableEvent" ); err != nil {
890+ logrus .Errorf ("failed adding service binding for %s epRec:%v err:%s" , eid , epRec , err )
850891 return
851892 }
852- }
853-
854- n . addSvcRecords ( name , ip , nil , true )
855- for _ , alias := range taskaliases {
856- n . addSvcRecords ( alias , ip , nil , true )
893+ } else {
894+ // This is a remote container simply attached to an attachable network
895+ if err := c . addContainerNameResolution ( nid , eid , containerName , taskAliases , ip , "handleEpTableEvent" ); err != nil {
896+ logrus . Errorf ( "failed adding service binding for %s epRec:%v err:%s" , eid , epRec , err )
897+ }
857898 }
858899 } else {
900+ logrus .Debugf ("handleEpTableEvent DEL %s R:%v" , isAdd , eid , epRec )
859901 if svcID != "" {
860- if err := c .rmServiceBinding (svcName , svcID , nid , eid , vip , ingressPorts , aliases , ip ); err != nil {
861- logrus .Errorf ("Failed adding service binding for value %s: %v" , value , err )
902+ // This is a remote task part of a service
903+ if err := c .rmServiceBinding (svcName , svcID , nid , eid , containerName , vip , ingressPorts , serviceAliases , taskAliases , ip , "handleEpTableEvent" ); err != nil {
904+ logrus .Errorf ("failed removing service binding for %s epRec:%v err:%s" , eid , epRec , err )
862905 return
863906 }
864- }
865-
866- n . deleteSvcRecords ( name , ip , nil , true )
867- for _ , alias := range taskaliases {
868- n . deleteSvcRecords ( alias , ip , nil , true )
907+ } else {
908+ // This is a remote container simply attached to an attachable network
909+ if err := c . delContainerNameResolution ( nid , eid , containerName , taskAliases , ip , "handleEpTableEvent" ); err != nil {
910+ logrus . Errorf ( "failed adding service binding for %s epRec:%v err:%s" , eid , epRec , err )
911+ }
869912 }
870913 }
871914}
0 commit comments