-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHadoop Multi Node
155 lines (110 loc) · 3.13 KB
/
Hadoop Multi Node
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
-----*** upload .pem key to DC **----
$ scp -i abc.pem abc.pem ubuntu@<pub-ip>:/home/ubuntu/.ssh
>> do this for all nodes
$ ssh -i abc.pem <private-ip of any node>
-----*** Adding hosts to NN SNN JT and DN's ***-----
$ sudo nano /etc/hosts
172.31.23.8 ip-172-31-23-8.eu-west-1.compute.internal nn
172.31.23.7 ip-172-31-23-7.eu-west-1.compute.internal snn
172.31.23.10 ip-172-31-23-10.eu-west-1.compute.internal jt
172.31.23.9 ip-172-31-23-9.eu-west-1.compute.internal 1dn
172.31.23.10 ip-172-31-23-10.eu-west-1.compute.internal 2dn
172.31.23.9 ip-172-31-23-9.eu-west-1.compute.internal 3dn
>> do this on all nodes
-----*** Configure .profile ***-----
$ nano .profile
eval `ssh-agent` ssh-add /home/ubuntu/.ssh/abc.pem
$ source .profile
>>> scp .profile to each node
-----*** Install dsh ***-----
$ sudo apt-get update
$ sudo apt install dsh -y
>>>> Edit machines.list for dsh
$ sudo nano /etc/dsh/machines.list
#localhost
nn
snn
jt
1dn
2dn
3dn
$ dsh -a uptime
$ dsh -a sudo apt-get update
-----*** Install Java ***----
$ dsh -a sudo apt install openjdk-8-jdk -y
$ dsh -a java -version
-----*** Download Hadoop ***-----
$ dsh -a wget -c http://apache.mirror.gtcomm.net/hadoop/common/hadoop-1.2.1/hadoop-1.2.1.tar.gz
$ dsh -a tar -xzvf hadoop-1.2.1.tar.gz
$ dsh -a sudo mv hadoop-1.2.1 /usr/local/hadoop
-----*** Edit .bashrc ***-----
nano .bashrc
export HADOOP_PREFIX=/usr/local/hadoop/
export PATH=$PATH:$HADOOP_PREFIX/bin
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export PATH=$PATH:$JAVA_HOME
>>> scp .bashrc to all nodes
$ scp .bashrc ubuntu@snn:~
$ dsh -a source .bashrc
-----*** Configuring xml's ***-----
$ cd /usr/local/hadooop/conf
$ nano hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_OPTS=-Djava.net.preferIPV4Stack=true
$ nano core-site.xml
<property>
<name>fs.default.name</name>
<value>hdfs://nn:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
$ nano hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
$ nano mapred-site.xml
<property>
<name>mapred.job.tracker</name>
<value>hdfs://jt:9001</value>
</property>
-----*** Configure masters and slaves ***-----
$ nano masters
#localhost
snn
$ nano slaves
#localhost
1dn
2dn
3dn
>>> on SNN
$ ssh snn
nano /usr/local/hadoop/conf/masters
#localhost
>>> on JT
$ ssh jt
nano /usr/local/hadoop/conf/masters
#localhost
jt
-----*** scp configurations xml's and slaves to all nodes ***-----
$ cd /usr/local/hadoop/conf/
$ scp hadoop-env.sh core-site.xml hdfs-site.xml mapred-site.xml slaves ubuntu@snn:/usr/local/hadoop/conf/
Same for all nodes
----*** Creating tmp directory (make sure you are on NN) ***-----
$ dsh -a mkdir /usr/local/hadoop/tmp
-----*** Exec bash ***-----
$ dsh -a exec bash
-----*** Formatting namenode(make sure you are on NN) ***-----
$ hadoop namenode -format
-----*** starting dfs daemons(from NN) ***-----
$ start-dfs.sh
-----*** Starting mapred daemons (make sure you are on JT) ***-----
$ start-mapred.sh
-----*** Check java process ***-----
$ dsh -a jps