-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Etcd membership module #17736
Add Etcd membership module #17736
Changes from all commits
a6829a0
9647bd6
8779d7f
38e2f53
f158b51
0565af5
9b5c2ad
8bc5f54
696ff87
cf48792
3bd1545
0702239
4db1c5d
d0b129c
a5462e7
07e63ca
6f91e3a
f66aea0
649f1af
15eec12
5a98697
9bb2752
74c2fe7
aeea556
3e742a3
cd5f6fb
efe5eb7
6461040
c2a5ffb
7212b41
f15a6bc
29c3ca5
348f0da
c03708a
d24bc0d
918c078
9e3b125
9fe70f0
11e7ca7
ab2d2d2
925bb78
6df3eec
3bc5003
1e9d688
80a5879
0816af0
b5f9614
4a32caf
4a5b49e
fe0bf2f
a26bf2e
ef18bc5
3bc06b2
1078c16
c87bce8
7ffe87f
cdc733b
66be616
3224225
7da6f45
e0327ad
28c04c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# This is the configuration file to start a etcd instance | ||
# e.g. /usr/local/bin/etcd --config-file /etc/etcd/etcd.conf | ||
# *******README****** | ||
# To make etcd a linux service: | ||
# After installation of etcd, make sure etcd and etcdctl | ||
# are available in /usr/local/bin | ||
# To make etcd a linux service: | ||
# Copy alluxio/conf/etcd/etcd.service.template to /etc/systemd/system/etcd.service | ||
# Copy alluxio/conf/etcd/etcd.conf.template to /etc/etcd/etcd.conf | ||
# For each etcd instance, change the config params in etcd.conf | ||
# accordingly. | ||
# And do: | ||
# #systemctl daemon-reload | ||
# Then etcd could be registered as a linux service | ||
# e.g. | ||
# Check status | ||
# #service etcd status | ||
# Start etcd | ||
# #service etcd start | ||
# Stop etcd | ||
# #service etcd stop | ||
|
||
|
||
# Human-readable name for this member. | ||
#name: 'etcd1' | ||
|
||
# Path to the data directory. | ||
data-dir: /etcd-data-dir/data | ||
|
||
# Path to the dedicated wal directory. | ||
wal-dir: /etcd-data-dir/wal | ||
|
||
|
||
# List of comma separated URLs to listen on for peer traffic. | ||
#give ip/hostname of this etcd instance | ||
listen-peer-urls: http://<hostname_or_ip>:2380 | ||
|
||
# List of comma separated URLs to listen on for client traffic. | ||
#give ip/hostname of this etcd instance | ||
listen-client-urls: http://<hostname_or_ip>:2379,http://127.0.0.1:2379 | ||
|
||
# List of this member's peer URLs to advertise to the rest of the cluster. | ||
# The URLs needed to be a comma-separated list. | ||
#give ip/hostname of this etcd instance for remote etcd members communication | ||
initial-advertise-peer-urls: http://<hostname_or_ip>:2380 | ||
|
||
# List of this member's client URLs to advertise to the public. | ||
# The URLs needed to be a comma-separated list. | ||
#give ip/hostname of this etcd instance for etcd client communication | ||
advertise-client-urls: http://<hostname_or_ip>:2379 | ||
|
||
# Initial cluster configuration for bootstrapping. | ||
#give all ip/hostnames of members of initial etcd cluster | ||
initial-cluster: etcd0=http://<hostname_or_ip>:2380,etcd1=http://<hostname_or_ip>:2380,etcd2=http://<hostname_or_ip>:2380 | ||
|
||
# Initial cluster token for the etcd cluster during bootstrap. | ||
#initial-cluster-token: 'etcd-cluster-1' | ||
|
||
# Initial cluster state ('new' or 'existing'). | ||
initial-cluster-state: 'new' | ||
|
||
# Enable debug-level logging for etcd. | ||
#log-level: debug | ||
|
||
#logger: zap | ||
|
||
# Specify 'stdout' or 'stderr' to skip journald logging even when running under systemd. | ||
# log-outputs: [stderr] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
[Unit] | ||
Description=Etcd Service | ||
|
||
[Service] | ||
ExecStart=/usr/local/bin/etcd --config-file /etc/etcd/etcd.conf | ||
KillSignal=SIGTERM | ||
StandardOutput=append:/var/log/etcd.log | ||
StandardError=append:/var/log/etcd.err | ||
|
||
[Install] | ||
WantedBy=default.target |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,8 @@ | |
import alluxio.grpc.GrpcServerAddress; | ||
import alluxio.master.MasterClientContext; | ||
import alluxio.master.MasterInquireClient; | ||
import alluxio.membership.MembershipManager; | ||
import alluxio.membership.NoOpMembershipManager; | ||
import alluxio.metrics.MetricsSystem; | ||
import alluxio.network.netty.NettyChannelPool; | ||
import alluxio.network.netty.NettyClient; | ||
|
@@ -154,6 +156,8 @@ public class FileSystemContext implements Closeable { | |
*/ | ||
private volatile ConcurrentHashMap<ClientPoolKey, BlockWorkerClientPool> | ||
mBlockWorkerClientPoolMap; | ||
@Nullable | ||
private MembershipManager mMembershipManager; | ||
apc999 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/** | ||
* Indicates whether the {@link #mLocalWorker} field has been lazily initialized yet. | ||
|
@@ -443,6 +447,11 @@ protected synchronized void initContext(ClientContext ctx, | |
mBlockMasterClientPool = new BlockMasterClientPool(mMasterClientContext); | ||
mBlockWorkerClientPoolMap = new ConcurrentHashMap<>(); | ||
mUriValidationEnabled = ctx.getUriValidationEnabled(); | ||
try { | ||
mMembershipManager = MembershipManager.Factory.create(getClusterConf()); | ||
} catch (IOException ex) { | ||
LOG.error("Failed to set membership manager.", ex); | ||
apc999 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
/** | ||
|
@@ -490,6 +499,12 @@ private synchronized void closeContext() throws IOException { | |
if (mMetricsEnabled) { | ||
MetricsHeartbeatContext.removeHeartbeat(getClientContext()); | ||
} | ||
LOG.debug("Closing membership manager."); | ||
try (AutoCloseable ignoredCloser = mMembershipManager) { | ||
// do nothing as we are closing | ||
} catch (Exception e) { | ||
throw new IOException(e); | ||
} | ||
Comment on lines
+503
to
+507
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this necessary since there is nothing inside the try-catch block? |
||
} else { | ||
LOG.warn("Attempted to close FileSystemContext which has already been closed or not " | ||
+ "initialized."); | ||
|
@@ -864,6 +879,17 @@ public List<BlockWorkerInfo> getCachedWorkers() throws IOException { | |
* @return the info of all block workers | ||
*/ | ||
protected List<BlockWorkerInfo> getAllWorkers() throws IOException { | ||
// TODO(lucy) once ConfigHashSync reinit is gotten rid of, will remove the blockReinit | ||
// guard altogether | ||
try (ReinitBlockerResource r = blockReinit()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I have reasons to believe this
Could you add a TODO on this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will address this as part of subsequent changes. will add TODO to notify There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [ADDRESS IN NEXT PR] |
||
// Use membership mgr | ||
if (mMembershipManager != null && !(mMembershipManager instanceof NoOpMembershipManager)) { | ||
return mMembershipManager.getAllMembers().stream() | ||
.map(w -> new BlockWorkerInfo(w.getAddress(), w.getCapacityBytes(), w.getUsedBytes())) | ||
.collect(toList()); | ||
} | ||
} | ||
// Fall back to old way | ||
try (CloseableResource<BlockMasterClient> masterClientResource = | ||
acquireBlockMasterClientResource()) { | ||
return masterClientResource.get().getWorkerInfoList().stream() | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -48,6 +48,7 @@ | |||||
import alluxio.master.metastore.MetastoreType; | ||||||
import alluxio.master.metastore.rocks.DataBlockIndexType; | ||||||
import alluxio.master.metastore.rocks.IndexType; | ||||||
import alluxio.membership.MembershipType; | ||||||
import alluxio.network.ChannelType; | ||||||
import alluxio.network.netty.FileTransferType; | ||||||
import alluxio.security.authentication.AuthType; | ||||||
|
@@ -5505,6 +5506,25 @@ public String toString() { | |||||
.setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) | ||||||
.setScope(Scope.WORKER) | ||||||
.build(); | ||||||
public static final PropertyKey WORKER_MEMBERSHIP_MANAGER_TYPE = | ||||||
enumBuilder(Name.WORKER_MEMBERSHIP_MANAGER_TYPE, MembershipType.class) | ||||||
.setDefaultValue(MembershipType.NOOP.name()) | ||||||
.setDescription("Type of membership manager used for workers." | ||||||
+ "Choose STATIC for pre-configured members." | ||||||
+ "Choose ETCD for using etcd for membership management" | ||||||
+ "Default is NOOP which does not enable membership manager at all") | ||||||
.setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) | ||||||
.setScope(Scope.ALL) | ||||||
.build(); | ||||||
public static final PropertyKey WORKER_STATIC_MEMBERSHIP_MANAGER_CONFIG_FILE = | ||||||
stringBuilder(Name.WORKER_STATIC_MEMBERSHIP_MANAGER_CONFIG_FILE) | ||||||
.setDefaultValue(format("${%s}/workers", Name.CONF_DIR)) | ||||||
.setDescription("Absolute path of the config file for list" | ||||||
+ "of worker hostnames/IPs for the cluster. " | ||||||
+ Name.WORKER_MEMBERSHIP_MANAGER_TYPE + " needs to be set" | ||||||
+ " to STATIC first.") | ||||||
.setScope(Scope.ALL) | ||||||
.build(); | ||||||
|
||||||
// | ||||||
// Proxy related properties | ||||||
|
@@ -7626,6 +7646,19 @@ public String toString() { | |||||
stringBuilder(Name.ZOOKEEPER_JOB_LEADER_PATH) | ||||||
.setDefaultValue("/alluxio/job_leader").build(); | ||||||
|
||||||
// | ||||||
// Membership related properties | ||||||
// | ||||||
public static final PropertyKey ALLUXIO_CLUSTER_NAME = | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No actually this IS the name of the alluxio cluster for etcd to manage, purpose is for etcd if it is managing multiple alluxio clusters There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My reason is, this property key is only used when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [ADDRESS IN NEXT PR] |
||||||
stringBuilder(Name.ALLUXIO_CLUSTER_NAME) | ||||||
.setDefaultValue("DefaultAlluxioCluster").build(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: use hyphen instead of camel case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why aren't all the builder fields being set? specifically
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh i think i missed it, let me add it, is it causing any problems now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not yet but... 🤷 |
||||||
public static final PropertyKey ETCD_ENDPOINTS = | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider in future it might be used for scope outside of worker membership, hence the name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer to stay with a smaller scope (by prefixing in the property key name). And later if you have a larger scope and introduce There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please address this also in the next PR @lucyge2022 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [ADDRESS IN NEXT PR] |
||||||
listBuilder(Name.ETCD_ENDPOINTS) | ||||||
.setDescription("A list of comma-separated http://host:port addresses of " | ||||||
+ "etcd cluster (e.g. http://localhost:2379,http://etcd1:2379)") | ||||||
.setScope(Scope.ALL) | ||||||
.build(); | ||||||
|
||||||
// | ||||||
// JVM Monitor related properties | ||||||
// | ||||||
|
@@ -8993,6 +9026,10 @@ public static final class Name { | |||||
public static final String WORKER_UFS_INSTREAM_CACHE_MAX_SIZE = | ||||||
"alluxio.worker.ufs.instream.cache.max.size"; | ||||||
public static final String WORKER_WHITELIST = "alluxio.worker.whitelist"; | ||||||
public static final String WORKER_MEMBERSHIP_MANAGER_TYPE = | ||||||
"alluxio.worker.membership.manager.type"; | ||||||
public static final String WORKER_STATIC_MEMBERSHIP_MANAGER_CONFIG_FILE = | ||||||
"alluxio.worker.static.membership.manager.config.file"; | ||||||
|
||||||
// | ||||||
// Proxy related properties | ||||||
|
@@ -9480,6 +9517,10 @@ public static final class Name { | |||||
public static final String ZOOKEEPER_JOB_ELECTION_PATH = "alluxio.zookeeper.job.election.path"; | ||||||
public static final String ZOOKEEPER_JOB_LEADER_PATH = "alluxio.zookeeper.job.leader.path"; | ||||||
|
||||||
// Membership related properties | ||||||
public static final String ALLUXIO_CLUSTER_NAME = "alluxio.cluster.name"; | ||||||
public static final String ETCD_ENDPOINTS = "alluxio.etcd.endpoints"; | ||||||
|
||||||
// | ||||||
// JVM Monitor related properties | ||||||
// | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do these two default locations work on Mac?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no this file is for setting up a linux service of etcd
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mac has
/var/log
but that belongs to root. I figure it's worth mentioning in a short comment, that one can use this path on Mac but may need sudo. I figure the template file is most useful when one want to test and experience a feature, and that test may very likely happen on Mac instead of linux.