You are on page 1of 4

# view your present working directory

$ pwd
# list the content of the home directory
$ ls -ltr
# de-compress the binary install file
$ tar xvzf hadoop-2.7.2.tar.gz
# list the content of the home directory
$ ls -ltr
# generate private-public key pair
$ ssh-keygen -t rsa -P ""
# add the public key to trust store
$ cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
# navigate to the configuration directory
$ cd hadoop-2.7.2/etc/hadoop
#
#
#
#
#

edit the following configuration files as shown below:


1. core-site.xml
2. hdfs-site.xml
3. yarn-site.xml
4. mapred-site.xml

$ nano core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://ubuntu.dhcp.blrl.sap.corp:9000</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>

$ nano hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>file:///home/user1/hdfs/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>file:///home/user1/hdfs/datanode</value>

</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>

$ nano yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>localhost</value>
<value>ubuntu.dhcp.blrl.sap.corp</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.cl
ass</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
$ nano mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>ubuntu.dhcp.brlr.sap.corp:9001</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
# update the hadoop environment to point to correct JDK
$ nano hadoop-env.sh
export JAVA_HOME="/usr/local/java/jdk1.8.0_65"

# update the following environment variables


$ nano ~/.bashrc # for root and each hadoop users
# JDK 1.8
export JAVA_HOME=/usr/local/java/jdk1.8.0_65
export PATH=$PATH:$HOME/bin:$JAVA_HOME/bin
# Hadoop 2.7.1
export HADOOP_INSTALL=/home/user1/hadoop-2.7.2

export PATH=$PATH:$HADOOP_INSTALL/bin:$HADOOP_INSTALL/sbin
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export YARN_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_LIB_NATIVE=$HADOOP_INSTALL/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib -Djava.net.p
referIPv4Stack=true"
export YARN_OPTS="-Djava.net.preferIPv4Stack=true"
# navigate to hadoop root directory
$ cd /home/user1/hadoop-2.7.2
# Format a new distributed-filesystem (hdfs)
$ bin/hadoop namenode -format
# the above command creates the following HDFS filesystem
$ ls -ltr $HOME/hdfs/namenode/current
# start / stop HDFS with the following command:
$ sbin/start-dfs.sh
$ sbin/stop-dfs.sh
# display all the processes
$ jps
#
$
$
$

Test HDFS:
bin/hadoop
bin/hadoop
bin/hadoop

create directory, list and delete using the following commands:


fs -mkdir /testing123
fs -ls
/testing123
fs -rmdir /testing123

# --------------------------------------------------------------------------------------------------------------------------# NameNode URL: http://ubuntu.dhcp.blrl.sap.corp:50070/


# --------------------------------------------------------------------------------------------------------------------------# start / stop YARN with the following command:
$ sbin/start-yarn.sh
$ sbin/stop-yarn.sh
# --------------------------------------------------------------------------------------------------------------------------# ResourceManager URL: http://ubuntu.dhcp.blrl.sap.corp:8088/
# NodeManager
URL: http://ubuntu.dhcp.blrl.sap.corp:8042
# --------------------------------------------------------------------------------------------------------------------------#
$
$
$
$
$
$

start hadoop services individually


sbin/hadoop-daemon.sh start namenode
sbin/hadoop-daemon.sh start datanode
sbin/yarn-daemon.sh start resourcemanager
sbin/yarn-daemon.sh start nodemanager
sbin/mr-jobhistory-daemon.sh start historyserver
jps

#
$
$
$
$
$
$

stop hadoop services individually


sbin/hadoop-daemon.sh stop namenode
sbin/hadoop-daemon.sh stop datanode
sbin/yarn-daemon.sh stop resourcemanager
sbin/yarn-daemon.sh stop nodemanager
sbin/mr-jobhistory-daemon.sh stop historyserver
jps

You might also like