# The number of milliseconds of each ticktickTime=2000# The number of ticks that the initial # synchronization phase can takeinitLimit=10# The number of ticks that can pass between # sending a request and getting an acknowledgementsyncLimit=5# the directory where the snapshot is stored.# do not use /tmp for storage, /tmp here is just # example sakes.dataDir=/usr/local/hadoop/zookeeper/datadataLogDir=/usr/local/hadoop/zookeeper/logs# the port at which the clients will connectclientPort=2181## Be sure to read the maintenance section of the # administrator guide before turning on autopurge.## The number of snapshots to retain in dataDir#autopurge.snapRetainCount=3# Purge task interval in hours# Set to "0" to disable auto purge feature#autopurge.purgeInterval=1 server.1=hadoop01:2888:3888server.2=hadoop02:2888:3888server.3=hadoop03:2888:3888
vim /usr/local/hadoop/hadoop/etc/hadoop/core-site.xml
fs.defaultFShdfs://ns1hadoop.tmp.dir/home/cluster/hadoop/data/tmpio.file.buffer.size131072Size of read/write buffer used in SequenceFilesha.zookeeper.quorumhadoop01:2181,hadoop02:2181,hadoop03:2181DFSZKFailoverControllerhadoop.proxyuser.root.hosts*hadoop.proxyuser.root.groups*
vim /usr/local/hadoop/hadoop/etc/hadoop/hdfs-site.xml
dfs.namenode.name.dir/home/cluster/hadoop/data/nndfs.datanode.data.dir/home/cluster/hadoop/data/dndfs.journalnode.edits.dir/home/cluster/hadoop/data/jndfs.nameservicesns1dfs.ha.namenodes.ns1hadoop01,hadoop02dfs.namenode.rpc-address.ns1.hadoop01hadoop01:8020dfs.namenode. hadoop01:9870dfs.namenode.rpc-address.ns1.hadoop02hadoop02:8020dfs.namenode. hadoop02:9870dfs.ha.automatic-failover.enabled.ns1truedfs.client.failover.proxy.provider.ns1org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProviderdfs.permissions.enabledfalsedfs.replication2dfs.blocksizeHDFS blocksize of 128MB for large file-systemsdfs.namenode.handler.count100More NameNode server threads to handle RPCs from large number of DataNodes.dfs.namenode.shared.edits.dirqjournal://hadoop01:8485;hadoop02:8485;hadoop03:8485/ns1dfs.ha.fencing.methodssshfencedfs.ha.fencing.ssh.private-key-files/root/.ssh/id_rsa
vim /usr/local/hadoop/hadoop/etc/hadoop/mapred-site.xml
mapreduce.framework.nameyarnExecution framework set to Hadoop YARN.mapreduce.map.memory.mb4096Larger resource limit for maps.mapreduce.map.java.opts-Xmx4096MLarger heap-size for child jvms of maps.mapreduce.reduce.memory.mb4096Larger resource limit for reduces.mapreduce.reduce.java.opts-Xmx4096MLarger heap-size for child jvms of reduces.mapreduce.task.io.sort.mb2040Higher memory-limit while sorting data for efficiency.mapreduce.task.io.sort.factor400More streams merged at once while sorting files.mapreduce.reduce.shuffle.parallelcopies200Higher number of parallel copies run by reduces to fetch outputs from very large number of maps.mapreduce.jobhistory.addresshadoop01:10020MapReduce JobHistory Server host:port.Default port is 10020mapreduce.jobhistory.webapp.addresshadoop01:19888MapReduce JobHistory Server Web UI host:port.Default port is 19888.mapreduce.jobhistory.intermediate-done-dir/tmp/mr-history/tmpDirectory where history files are written by MapReduce jobs.mapreduce.jobhistory.done-dir/tmp/mr-history/doneDirectory where history files are managed by the MR JobHistory Server.
vim /usr/local/hadoop/hadoop/etc/hadoop/yarn-site.xml
yarn.resourcemanager.ha.enabledtrueyarn.resourcemanager.ha.automatic-failover.enabledtrueyarn.resourcemanager.ha.automatic-failover.embeddedtrueyarn.resourcemanager.cluster-idyarn-rm-clusteryarn.resourcemanager.ha.rm-idsrm1,rm2yarn.resourcemanager.hostname.rm1hadoop01yarn.resourcemanager.hostname.rm2hadoop02yarn.resourcemanager.recovery.enabledtrueyarn.resourcemanager.zk.state-store.addresshadoop01:2181,hadoop02:2181,hadoop03:2181yarn.resourcemanager.zk-addresshadoop01:2181,hadoop02:2181,hadoop03:2181yarn.resourcemanager.address.rm1hadoop01:8032yarn.resourcemanager.address.rm2hadoop02:8032yarn.resourcemanager.scheduler.address.rm1hadoop01:8034yarn.resourcemanager.webapp.address.rm1hadoop01:8088yarn.resourcemanager.scheduler.address.rm2hadoop02:8034yarn.resourcemanager.webapp.address.rm2hadoop02:8088yarn.acl.enabletrueEnable ACLs? Defaults to false.yarn.admin.acl*yarn.log-aggregation-enablefalseConfiguration to enable or disable log aggregationyarn.resourcemanager.hostnamehadoop01host Single hostname that can be set in place of setting all yarn.resourcemanager*address resources. Results in default ports for ResourceManager components.yarn.scheduler.minimum-allocation-mb1024saprk调度时一个container能够申请的最小资源,默认值为1024MByarn.scheduler.maximum-allocation-mb28672saprk调度时一个container能够申请的最大资源,默认值为8192MByarn.nodemanager.resource.memory-mb28672nodemanager能够申请的最大内存,默认值为8192MByarn.app.mapreduce.am.resource.mb28672AM能够申请的最大内存,默认值为1536MByarn.nodemanager.log.retain-seconds10800yarn.nodemanager.log-dirs/home/cluster/yarn/log/1,/home/cluster/yarn/log/2,/home/cluster/yarn/log/3yarn.nodemanager.aux-servicesmapreduce_shuffleShuffle service that needs to be set for Map Reduce applications.yarn.log-aggregation.retain-seconds-1yarn.log-aggregation.retain-check-interval-seconds-1yarn.app.mapreduce.am.staging-dirhdfs://ns1/tmp/hadoop-yarn/stagingThe staging dir used while submitting jobs. yarn.application.classpath/usr/local/hadoop/hadoop/etc/hadoop:/usr/local/hadoop/hadoop/share/hadoop/common/lib/*:/usr/local/hadoop/hadoop/share/hadoop/common/*:/usr/local/hadoop/hadoop/share/hadoop/hdfs:/usr/local/hadoop/hadoop/share/hadoop/hdfs/lib/*:/usr/local/hadoop/hadoop/share/hadoop/hdfs/*:/usr/local/hadoop/hadoop/share/hadoop/mapreduce/*:/usr/local/hadoop/hadoop/share/hadoop/yarn:/usr/local/hadoop/hadoop/share/hadoop/yarn/lib/*:/usr/local/hadoop/hadoop/share/hadoop/yarn/*Linux上打 hadoop classpath 找到的所有路径
七、初始化并启动集群
1、启动zookeeper(hadoop01、hadoop02、hadoop03)
由于hadoop的HA机制依赖于zookeeper,因此先启动zookeeper集群
zkServer.sh start
zkServer.sh status
2、在zookeeper中初始化元数据(hadoop01)
hdfs zkfc -formatZK
3、启动zkfc(hadoop01、hadoop02)
hdfs --daemon start zkfc
4、启动JournalNode(hadoop01、hadoop02、hadoop03)
格式化NameNode前必须先格式化JournalNode,否则格式化失败
这里配置了3个JournalNode节点,hadoop01、hadoop02、hadoop03
hdfs --daemon start journalnode
5、格式化NameNode(hadoop01)
在第一台NameNode节点上执行
hdfs namenode -format
6、启动hdfs(hadoop01)
hdfs --daemon start namenode
7、同步备份NameNode(hadoop02)
等hdfs初始化完成之后(20秒),在另一台NameNode上执行
hdfs namenode -bootstrapStandby
常见报错(1)如果格式化失败或者出现以下错误,把对应节点上的目录删掉再重新格式化
Directory is in an inconsistent state: Can't format the storage directory because the current directory is not empty.
INFO ipc.Client: Retrying connect to server: hadoop01/172.18.0.2:8020. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS) FATAL ha.BootstrapStandby: Unable to fetch namespace information from any remote NN. Possible NameNodes: [RemoteNameNodeInfo [nnId=hadoop01, ipcAddress=hadoop01/172.18.0.2:8020, /etc/hosts /etc/hosts.init
Error saving credentials: error storing credentials - err: exit status 1, out: `Error spawning command line “dbus-lch --autolaunch= --binary-syntax --close-stderr”: Child process exited with code 1`
解决办法:
sudo apt install gnupg2 pass
3、生成仓库镜像版本号
生成仓库镜像版本语法1:
docker tag [ImageId] registry.cn-shanghai.aliyuncs.com/[命名空间名称]/[仓库名称]:[镜像版本号]
docker tag 0414bdf69fa4 registry.cn-hangzhou.aliyuncs.com/hadoopcluster/hadoop:v1.1
生成仓库镜像版本语法2:
docker tag [Image:TAG] registry.cn-shanghai.aliyuncs.com/[命名空间名称]/[仓库名称]:[镜像版本号]
docker tag hadoop01:v1 registry.cn-hangzhou.aliyuncs.com/hadoopcluster/hadoop:v1.1docker tag hadoop02:v1 registry.cn-hangzhou.aliyuncs.com/hadoopcluster/hadoop:v2.1docker tag hadoop03:v1 registry.cn-hangzhou.aliyuncs.com/hadoopcluster/hadoop:v3.1
暂时没有评论,来抢沙发吧~