Skip to content

Commit

Permalink
brick-mux : fix spurious socket connect failure issue during volume stop
Browse files Browse the repository at this point in the history
Change the logic of using the parent socket file to send the detach
request by picking it up from /proc/<pid>/cmdline where pid is the
process id of the parent brick. During Multiplex, this would avoid
creating a hard link of a socket file.

Credits : [email protected] for helping with the utility function

Fixes: gluster#1468
Signed-off-by: Atin Mukherjee <[email protected]>
  • Loading branch information
Atin Mukherjee committed Jan 8, 2019
1 parent 8ef2e18 commit 1a04cf3
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 19 deletions.
10 changes: 5 additions & 5 deletions glusterd2/brick/glusterfsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type Glusterfsd struct {
// Externally consumable using methods of Glusterfsd interface
binarypath string
args []string
socketfilepath string
Socketfilepath string
pidfilepath string

// For internal use
Expand Down Expand Up @@ -89,8 +89,8 @@ func (b *Glusterfsd) Args() []string {
// SocketFile returns path to the brick socket file used for IPC.
func (b *Glusterfsd) SocketFile() string {

if b.socketfilepath != "" {
return b.socketfilepath
if b.Socketfilepath != "" {
return b.Socketfilepath
}

// First we form a key
Expand All @@ -101,10 +101,10 @@ func (b *Glusterfsd) SocketFile() string {
// Example: /var/run/gluster/<xxhash-hash>.socket
glusterdSockDir := config.GetString("rundir")
hash := strconv.FormatUint(xxhash.Sum64String(key), 16)
b.socketfilepath = path.Join(glusterdSockDir, hash+".socket")
b.Socketfilepath = path.Join(glusterdSockDir, hash+".socket")
// FIXME: The brick can no longer clean this up on clean shut down

return b.socketfilepath
return b.Socketfilepath
}

// PidFile returns path to the pid file of the brick process
Expand Down
17 changes: 13 additions & 4 deletions glusterd2/brickmux/demultiplex.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package brickmux
import (
"fmt"
"os"
"time"

"github.com/gluster/glusterd2/glusterd2/brick"
"github.com/gluster/glusterd2/glusterd2/daemon"
Expand All @@ -27,16 +26,26 @@ func IsLastBrickInProc(b brick.Brickinfo) bool {
// Demultiplex sends a detach request to the brick process which the
// specified brick is multiplexed onto.
func Demultiplex(b brick.Brickinfo) error {

var pidOnFile int
log.WithField("brick", b.String()).Debug("get brick daemon for demultiplex process")
brickDaemon, err := brick.NewGlusterfsd(b)
if err != nil {
return err
}
if pidOnFile, err = daemon.ReadPidFromFile(brickDaemon.PidFile()); err == nil {
log.WithFields(log.Fields{"brick": b.String(),
"pidfile": brickDaemon.PidFile()}).Error("Failed to read the pidfile")
return err

}
brickDaemon.Socketfilepath, err = glusterdGetSockFromBrickPid(pidOnFile)
if err != nil {
log.WithFields(log.Fields{"brick": b.String(),
"pid": pidOnFile}).Error("Failed to get the socket file of the glusterfsd process")
return err
}
log.WithFields(log.Fields{"brick": b.String(),
"socketFile": brickDaemon.SocketFile()}).Debug("starting demultiplex process")

client, err := daemon.GetRPCClient(brickDaemon)
if err != nil {
return err
Expand All @@ -62,7 +71,7 @@ func Demultiplex(b brick.Brickinfo) error {
// There might be some changes on glusterfsd side related to socket
// files used while brick signout,
// make appropriate changes once glusterfsd side is fixed.
time.Sleep(time.Millisecond * 200)
//time.Sleep(time.Millisecond * 200)

log.WithField("brick", b.String()).Debug("deleting brick socket and pid file")
os.Remove(brickDaemon.PidFile())
Expand Down
8 changes: 0 additions & 8 deletions glusterd2/brickmux/multiplex.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package brickmux
import (
"fmt"
"net/rpc"
"os"

"github.com/gluster/glusterd2/glusterd2/brick"
"github.com/gluster/glusterd2/glusterd2/daemon"
Expand Down Expand Up @@ -87,13 +86,6 @@ func Multiplex(b brick.Brickinfo, v *volume.Volinfo, volumes []*volume.Volinfo,
return err
}

// create Unix Domain Socket hardlink
os.Remove(brickProc.SocketFile())
if err := os.Link(targetBrickProc.SocketFile(), brickProc.SocketFile()); err != nil {
undoMultiplex(client, &b)
return err
}

// create duplicate pidfile for the multiplexed brick
ok, pid := daemon.IsRunning(targetBrickProc)
if !ok {
Expand Down
27 changes: 25 additions & 2 deletions glusterd2/brickmux/utils.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package brickmux

import (
"fmt"
config "github.com/spf13/viper"
"io/ioutil"
"os"
"path"

config "github.com/spf13/viper"
"strings"
)

// getBrickVolfilePath returns path correspponding to the volfileID. Since, brick volfiles are now stored on
Expand All @@ -20,3 +22,24 @@ func getBrickVolfilePath(volfileID string) (string, error) {

return volfilePath, nil
}

// glusterdGetSockFromBrickPid returns the socket file from the /proc/
// filesystem for the concerned process running with the same pid
func glusterdGetSockFromBrickPid(pid int) (string, error) {
content, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
if err != nil {
return "", err
}

parts := strings.Split(string(content), "\x00")
prevPart := ""
socketFile := ""
for _, p := range parts {
if prevPart == "-S" {
socketFile = p
break
}
prevPart = p
}
return socketFile, nil
}

0 comments on commit 1a04cf3

Please sign in to comment.