Professional Documents
Culture Documents
Install java
chandu@chandu-Inspiron-N5010:~$ sudo vim /etc/profile
JAVA_HOME=/usr/local/java/jdk1.8.0_111
JRE_HOME=$JAVA_HOME/jre
PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin
export JAVA_HOME
export JRE_HOME
export PATH
chandu@chandu-Inspiron-N5010:~$ . /etc/profile
chandu@chandu-Inspiron-N5010:~$ jps
26824 Jps
chandu@chandu-Inspiron-N5010:~$ javac -version
javac 1.8.0_111
Installing java automatically from app store
chandu@chandu-Inspiron-N5010:~$ sudo apt-get install <java package name>
chandu@chandu-Inspiron-N5010:~$ java -version
Path of java
chandu@chandu-Inspiron-N5010:~$ gedit ~/.bashrc
JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64
PATH=$PATH:$JAVA_HOME/bin
Create group
root@chandu-Inspiron-N5010:Home/chandu# sudo addgroup hadoop
Create user
root@chandu-Inspiron-N5010:Home/chandu# sudo adduser hduser
Add hduser to group hadoop
root@chandu-Inspiron-N5010:Home/chandu# sudo adduser hduser hadoop
Add hduser to sudoer list
root@chandu-Inspiron-N5010:Home/chandu# sudo visudo
#allow members of group sudo to execute any command
hduser ALL=(ALL)ALL
ctrl+x
Yes and enter
Generate key
hduser@chandu-Inspiron-N5010:~$ ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
hduser@chandu-Inspiron-N5010:~$ cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
hduser@chandu-Inspiron-N5010:~$ chmod 700 $HOME/.ssh/authorized_keys
If the SSH connect should fail, these general tips might help:
Enable debugging with ssh -vvv localhost and investigate the error in detail.
Check the SSH server configuration in /etc/ssh/sshd_config, in particular the options
PubkeyAuthentication (which should be set to yes) and AllowUsers (if this option is
active, add the hduser user to it). If you made any changes to the SSH server
configuration file, you can force a configuration reload with sudo /etc/init.d/ssh reload.
-Hadoop Installation
hduser@chandu-Inspiron-N5010:~$ cd ~/Desktop
hduser@chandu-Inspiron-N5010:~/Desktop$ sudo mv hadoop-2.7.3.tar.gz /usr/local/
hduser@chandu-Inspiron-N5010:~$ cd /usr/local/
hduser@chandu-Inspiron-N5010:/usr/local/$ sudo tar -xvzf hadoop-2.7.3.tar.gz
hduser@chandu-Inspiron-N5010:/usr/local/$ sudo ln -s hadoop-2.7.3 hadoop
hduser@chandu-Inspiron-N5010:/usr/local/$ sudo chown -R hduser:hadoop hadoop-2.7.3
/usr/local/hadoop
Changing permission to the folder
hduser@chandu-Inspiron-N5010:/usr/local/$ sudo chmod 777 hadoop-2.7.3
hduser@chandu-Inspiron-N5010:~$ cd /usr/local/hadoop/etc/hadoop/
hduser@chandu-Inspiron-N5010:/usr/local/hadoop/etc/hadoop$
vim hadoop-env.sh
press i
/usr/local/java/jdk1.8.0_111--------------------------------java path
export HADOOP_OPTS=-Djava.net.preferIPv4Stack=true
export HADOOP_HOME_WARN_SUPPRESS="TRUE"
export JAVA_HOME=/usr/local/java/jdk1.8.0_111
LD_LIBRARY_PATH=/usr/local/hadoop/lib/native/:$LD_LIBRARY_PATH
Esc
:wq
hduser@chandu-Inspiron-N5010:~$ vim ~/.bashrc
# Native Path
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
# Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later
on)
export JAVA_HOME=/usr/local/java/jdk1.8.0_111
vim yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>localhost</value>
</property>
<property>
<name>yarn.nodemanager.aux-servies</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
hduser@laptop:~$ cd /usr/local/hadoop/etc/hadoop/
vim core-site.xml
<property>
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:53410</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
theFileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
hduser@laptop:~$ cd /usr/local/hadoop/etc/hadoop/
cp mapred-site.xml.template mapred-site.xml
vim mapred-site.xml
<property>
<name>mapreduse.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>localhost:54311</value>
<description>The host and port that the MapReduce job
tracker runs at. If "local", then jobs are run in-process as a
single map
and reduce task.
</description>
</property>
hduser@laptop:~$ cd /usr/local/hadoop/etc/hadoop/
vim hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>1</value>
<description>Default block replication.
The actual number of replications can be specified when the file is created.
The default is used if replication is not specified in create time.
</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop_store/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop_store/hdfs/datanode</value>
</property>
start-dfs.sh
start-yarn.sh
mr-jobhistory-daemon.sh start historyserver
jps
namenode 50070
resourcemanager 8088
mapreducer 19888
Pig installation
hduser@chandu-Inspiron-N5010:~/Desktop$ sudo mv pig-0.16.0.tar.gz /usr/local/
hduser@chandu-Inspiron-N5010:~$ cd /usr/local/
hduser@chandu-Inspiron-N5010:/usr/local/$ sudo tar -xvzf pig-0.16.0.tar.gz
hduser@chandu-Inspiron-N5010:/usr/local/$ sudo ln -s pig-0.16.0 pig
Hive installation
hduser@chandu-Inspiron-N5010:~/Desktop$ sudo mv apache-hive-2.1.0-bin.tar.gz /usr/local/
hduser@chandu-Inspiron-N5010:~$ cd /usr/local/
hduser@chandu-Inspiron-N5010:/usr/local/$ sudo tar -xvzf apache-hive-2.1.0-bin.tar.gz
hduser@chandu-Inspiron-N5010:/usr/local/$ sudo ln -s apache-hive-2.1.0-bin hive
Add hive path
export HIVE_HOME= usr/local/hive
export PATH= $PATH:$ HIVE_HOME/bin
On all daemons
---------------------This will create warehouse folder for us.
hdfs dfs mkdir p /user/hive/warehouse
hdfs dfs mkdir p /tmp/hive
hdfs dfs chmod 777 /tmp
hdfs dfs chmod 777 /user/hive/warehouse
hdfs dfs chmod 777 /tmp/hive
Sudo rm /home/hduser/ecosystem/hive/lib/log4j-slf4j-impl.jar
Initialize the database to be used with hive
From 2.1.0 version onward we have to tell which type of database we are
using