@tsing1226
2016-03-24T08:29:40.000000Z
字数 7462
阅读 1258
hadoop
其配置
jdk-7u67-linux-x64.tar.gz
hadoop-2.5.0.tar.gz
Centos6.4 _64
添加用户
adduser grc
passwd grc
设置普通用户的sudo权限
$vi /etc/sudoers
在第一行添加如下内容:
grc ALL=(root)NOPASSWD:ALL
sudo hostname hadoop-senior02.grc.com
sudo vi /etc/sysconfig/network
HOSTNAME=hadoop-senior02.grc.com
vi /etc/hosts
192.168.2.102 hadoop-senior02.grc.com hadoop-senior02
关闭防火墙
vi /etc/sysconfig/selinux
SELINUX=disabled
永久关闭防火墙
chkconfig iptables off
查看防火墙是否关闭
sudo service iptables status
rpm -qa | grep java
sudo rpm -e --nodeps java-1.6.0-openjdk-1.6.0.0-1.50.1.11.5.el6_3.x86_64 tzdata-java-2012j-1.el6.noarch java-1.7.0-openjdk-1.7.0.9-2.3.4.1.el6_3.x86_64
创建目录
sudo mkdir /opt/modules
sudo mkdir /opt/datas
sudo mkdir /opt/softwares
sudo mkdir /opt/tools
修改文件修改权限
sudo chown -R grc:grc /opt/*
- 解压软件
tar -zxf jdk-7u67-linux-x64.tar.gz -C /opt/modules/
- sudo vi /etc/profile
#JAVA_HOME
export JAVA_HOME=/opt/modules/jdk1.7.0_67
export PATH=$JAVA_HOME/bin:$PATH
source /etc/profile
验证JDK是否成功安装
[grc@hadoop-senior02 softwares]$ java -version
java version "1.7.0_67"
Java(TM) SE Runtime Environment (build 1.7.0_67-b01)
Java HotSpot(TM) 64-Bit Server VM (build 24.65-b04, mixed mode)
- jdk:jdk-7u67-linux-x64.tar.gz
- hadoop:hadoop-2.5.0-src.tar.gz
- maven:apache-maven-3.0.5-bin.tar.gz
- protobuf:protobuf-2.5.0.tar.gz
- Centos6.4 _64
网易(163)yum源是国内最好的yum源之一 ,无论是速度还是软件版本,都非常的不错。
将yum源设置为163 yum,可以提升软件包安装和更新的速度,同时避免一些常见软件版本无法找到。
安装步骤
首先备份/etc/yum.repos.d/CentOS-Base.repo
sudo mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
下载对应版本repo文件, 放入/etc/yum.repos.d/(操作前请做好相应备份)
CentOS5 :http://mirrors.163.com/.help/CentOS5-Base-163.repo
CentOS6 :http://mirrors.163.com/.help/CentOS6-Base-163.repo
运行以下命令生成缓存
yum clean all
yum makecache
yum install maven svn ncurses-devel gcc* lzo-devel zlib-devel snappy snappy-devel autoconf automake libtool cmake openssl-devel
tar -zxf apache-ant-1.9.6-bin.tar.gz -C /opt/modules/
- sudo vi /etc/profile
##ANT_HOME
export ANT_HOME=/opt/modules/apache-ant-1.9.6
export PATH=$PATH:$ANT_HOME/bin
- source /etc/profile
- ant -version
- 解压安装
tar -zxf findbugs-1.3.9.tar.gz -C /opt/modules
- sudo vi /etc/profile
##FINDBUGS_HOME
export FINDBUGS_HOME=/opt/modules/findbugs-1.3.9
export PATH=$PATH:$FINDBUGS_HOME/bin
source /etc/profile
findbugs -version
* 解压安装
tar -zxf protobuf-2.5.0.tar.gz -C /opt/modules/
cd protobuf-2.5.0 ./configure --prefix=/usr/local make && make check && make install
NOTE:执行命令时一定要用root,启用最高权限,否则不成功!
- sudo vi /etc/profile
export PATH=$PATH:/usr/local/protobuf/bin/
export PKG_CONFIG_PATH=/usr/local/protobuf/lib/pkgconfig/
source /etc/profile
protoc --version
- 解压安装
tar -zxf apache-maven-3.0.5-bin.tar.gz -C /opt/modules/
- sudo vi /etc/profile
## MAVEN_HOME
export MAVEN_HOME=/opt/modules/apache-maven-3.0.5
export PATH=$MAVEN_HOME/bin:$PATH
source /etc/profile
mvn -version
Apache Maven 3.0.5 (r01de14724cdef164cd33c7c8c2fe155faf9602da; 2013-02-19 05:51:28-0800)
Maven home: /opt/modules/apache-maven-3.0.5
Java version: 1.7.0_67, vendor: Oracle Corporation
Java home: /opt/modules/jdk1.7.0_67/jre
Default locale: en_US, platform encoding: UTF-8
OS name: "linux", version: "2.6.32-358.el6.x86_64", arch: "amd64", family: "unix"
maven中央仓库的配置
- vim /opt/modules/apache-maven-3.0.5/conf/settings.xml
<mirror>
<id>nexus-osc</id>
<mirrorOf>*</mirrorOf>
<name>Nexus osc</name>
<url>http://maven.oschina.net/content/groups/public/</url>
</mirror>
mvn clean package -DskipTests -Pdist,native -Dtar -Dsnappy.lib=/usr/lib64 -Dbundle.snappy
- NOTE:/usr/lib64是snappy的安装目录
- 编译完成之后,目录hadoop-2.5.0-src/hadoop-dist/target/hadoop-2.5.0
cd hadoop-dist/target/ tar -zxf hadoop-2.5.0.tar.gz -C /opt/modules/
- core-site.xml
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec
</value>
</property>
- mapred-site.xml
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
- $bin/hadoop checknative
[grc@hadoop-senior02 hadoop-2.5.0]$ bin/hadoop checknative
16/01/14 23:58:54 INFO bzip2.Bzip2Factory: Successfully loaded & initialized native-bzip2 library system-native
16/01/14 23:58:54 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library
Native library checking:
hadoop: true /opt/modules/hadoop-2.5.0/lib/native/libhadoop.so.1.0.0
zlib: true /lib64/libz.so.1
snappy: true /opt/modules/hadoop-2.5.0/lib/native/libsnappy.so.1
lz4: true revision:99
bzip2: true /lib64/libbz2.so.1
- hadoop-env.sh,yarn-env.sh,mapred-env.sh
export JAVA_HOME=/opt/modules/jdk1.7.0_67
- core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-senior02.grc.com:8020</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>grc</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/modules/hadoop-2.5.0/data/tmp</value>
</property>
- 创建文件
mkdir -p data/tmp
- salves
hadoop-senior02.grc.com
- hdfs-site.xml
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hdfs://hadoop-senior02.grc.com:50090</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
bin/hdfs namenode -format
sbin/hadoop-daemon.sh start namenode
sbin/hadoop-daemon.sh start namenode
jps
* HDFS上创建文件
bin/hdfs dfs -mkdir -p tmp
- 上传文件
bin/hdfs dfs -put etc/hadoop/*site.xml tmp
- 查看HDFS上传文件
bin/hdfs dfs -ls tmp
cp mapred-site.xml.template mapred-site.xml
mapred-site.xml
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-senior02.grc.com:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop-senior02.grc.com:19888</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
- yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop-senior02.grc.com</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-check-interval-seconds</name>
<value>106800</value>
</property>
sbin/yarn-daemon.sh start resourcemanager
sbin/yarn-daemon.sh start nodemanager
sbin/mr-jobhistory-daemon.sh start historyserver
- 查看进程
jps
- 创建wc.txt文件
touch wc.txt
- vi wc.txt
HADOOP hadoop
mapreduce yarn
hivehbase
- 创建wordcount文件
bin/hdfs dfs -mkdir -p wordcount/input
* 运行wordcount
$ bin/hdfs dfs -put /opt/datas/wc.txt wordcount/input
bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.0.jar wordcount wordcount/input wordcount/output
- 查看处理结果
bin/hdfs dfs -text wordcount/output/p*
ssh-keygen -t rsa
ssh-copy-id hadoop-senior02.grc.com
ssh hadoop-senior02.grc.com
sbin/stop-dfs.sh sbin/stop-yarn.sh sbin/mr-jobhistory-daemon.sh stop historyserver
参考地址:
1. http://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-common/core-default.xml
2. http://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml
4. http://hadoop.apache.org/docs/r2.5.2/hadoop-yarn/hadoop-yarn-common/yarn-default.xml
5. http://hadoop.apache.org/docs/r2.5.2/hadoop-project-dist/hadoop-common/SingleCluster.html
6.http://www.micmiu.com/bigdata/hadoop/hadoop-snappy-install-config/
7.http://my.oschina.net/u/1428349/blog/313646?fromerr=sx4pDz6V