-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.html
981 lines (619 loc) · 37.2 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
<!DOCTYPE html>
<!--[if IEMobile 7 ]><html class="no-js iem7"><![endif]-->
<!--[if lt IE 9]><html class="no-js lte-ie8"><![endif]-->
<!--[if (gt IE 8)|(gt IEMobile 7)|!(IEMobile)|!(IE)]><!--><html class="no-js" lang="en"><!--<![endif]-->
<head>
<meta charset="utf-8">
<title>application melding</title>
<meta name="author" content="Robert Rati">
<meta name="description" content="In my previous posts here, here, and here I detailed how to create
docker images with hadoop installed and pre-configured as well as how they
work. …">
<!-- http://t.co/dKP3o1e -->
<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="canonical" href="http://rrati.github.com/">
<link href="/favicon.png" rel="icon">
<link href="/stylesheets/screen.css" media="screen, projection" rel="stylesheet" type="text/css">
<script src="/javascripts/modernizr-2.0.js"></script>
<script src="/javascripts/ender.js"></script>
<script src="/javascripts/octopress.js" type="text/javascript"></script>
<link href="/atom.xml" rel="alternate" title="application melding" type="application/atom+xml">
<!--Fonts from Google"s Web font directory at http://google.com/webfonts -->
<link href="http://fonts.googleapis.com/css?family=PT+Serif:regular,italic,bold,bolditalic" rel="stylesheet" type="text/css">
<link href="http://fonts.googleapis.com/css?family=PT+Sans:regular,italic,bold,bolditalic" rel="stylesheet" type="text/css">
</head>
<body >
<header role="banner"><hgroup>
<h1><a href="/">application melding</a></h1>
<h2>making square pegs fit in round holes</h2>
</hgroup>
</header>
<nav role="navigation"><ul class="subscription" data-subscription="rss">
<li><a href="/atom.xml" rel="subscribe-rss" title="subscribe via RSS">RSS</a></li>
</ul>
<form action="http://google.com/search" method="get">
<fieldset role="search">
<input type="hidden" name="q" value="site:rrati.github.com" />
<input class="search" type="text" name="q" results="0" placeholder="Search"/>
</fieldset>
</form>
<ul class="main-navigation">
<li><a href="/">Blog</a></li>
<li><a href="/blog/archives">Archives</a></li>
</ul>
</nav>
<div id="main">
<div id="content">
<div class="blog-index">
<article>
<header>
<h1 class="entry-title"><a href="/blog/2014/05/09/apache-hadoop-plus-docker-plus-fedora-issues-and-limitations/">Apache Hadoop + Docker + Fedora: Issues and Limitations</a></h1>
<p class="meta">
<time datetime="2014-05-09T07:26:34-04:00" pubdate data-updated="true">May 9<span>th</span>, 2014</time>
</p>
</header>
<div class="entry-content"><p>In my previous posts <a href="http://rrati.github.io/blog/2014/05/06/apache-hadoop-plus-docker-plus-fedora-building-images">here</a>, <a href="http://rrati.github.io/blog/2014/05/07/apache-hadoop-plus-docker-plus-fedora-running-images">here</a>, and <a href="http://rrati.github.io/blog/2014/05/08/apache-hadoop-plus-docker-plus-fedora-why-it-works">here</a> I detailed how to create
docker images with hadoop installed and pre-configured as well as how they
work. While it is neat that this can be done, its usefulness is somewhat
limited because the containers will all need to run on a single host. You
can’t scale to a very useful cluster with all the resources running on a single
machine. The limitations come mostly from docker’s networking capability. I
used docker version 0.9 in Fedora 20 for my testing.</p>
<h2>Docker Networking Limitations</h2>
<p>In docker 0.9 the networking support is rather limiting. You can find a more
detailed explaination for how it functions pretty easily elsewhere, but in a
nut shell docker containers are given IP addresses in a subnet determined by
the IP address of the bridge interface docker will use. IP addresses are
assigned depending on the order the containers are started so the only way to
ensure containers get the same IP address is to ensure the number of containers
running on a host is always the same and the containers are always started in
the same order. If the IP address of the bridge interface is changed then you
won’t get the same IP addresses no matter what.</p>
<p>Docker also doesn’t allow a container to talk on the host network directly like
you can in bridge networking mode with most virtualization software. This
means there’s no way for a container to talk to a real DHCP server and be
managed and accessible like physcial machines. This also means there’s no way
to directly access a container from outside the host machine. DNS becomes
difficult if not useless all together.</p>
<p>The only means docker provides for external access to a container is port
forwarding. Docker accomplishes this behind the scenes by adding forwarding
rules into the host machine’s iptables rules for each port forwarded. This can
get messy if you have many ports being forwarded to your containers.</p>
<p>If your application needs DNS you’re pretty much hosed as far as I can see.</p>
<h2>Cross-Host Networking with Containers</h2>
<p>Is all hope lost for containers communicating across host machines? Luckily,
No. Despite these limitations, there are some tools and methods that allow
containers to communicate with containers running on other hosts. They all
involve manipulating iptables in combination with special setup. There are a
number of documented approaches to solve this problem. The interesting ones
I’ve found are:</p>
<ul>
<li><a href="http://goldmann.pl/blog/2014/01/21/connecting-docker-containers-on-multiple-hosts">Open vSwitch</a></li>
<li><a href="https://github.com/jpetazzo/pipework">pipework</a></li>
<li><a href="http://blog.codeaholics.org/2013/giving-dockerlxc-containers-a-routable-ip-address">Virutal Interfaces</a></li>
<li><a href="https://github.com/openshift/geard">geard</a></li>
<li><a href="http://objectiveoriented.com/devops/2014/02/15/docker-io-service-discovery-your-network-and-how-to-make-it-work">Service Discovery with Skydock and Skydns</a></li>
</ul>
<p>The two approaches from that list that seem most interesting to me are geard
and the virtual interfaces approach. geard is much more than just iptables
manipulation and its tool chain looks to make managing containers easier. The
virtual interfaces approach is the closest I’ve seen to docker doing bridge
networking used by other virtualization technologies. Which approach you use
probably depends upon what your use case will be. For hadoop I plan to try out
geard and disabling DNS lookups in the namenode.</p>
</div>
</article>
<article>
<header>
<h1 class="entry-title"><a href="/blog/2014/05/08/apache-hadoop-plus-docker-plus-fedora-why-it-works/">Apache Hadoop + Docker + Fedora: Why It Works</a></h1>
<p class="meta">
<time datetime="2014-05-08T09:39:10-04:00" pubdate data-updated="true">May 8<span>th</span>, 2014</time>
</p>
</header>
<div class="entry-content"><p>In two previous posts <a href="http://rrati.github.io/blog/2014/05/06/apache-hadoop-plus-docker-plus-fedora-building-images">here</a> and <a href="http://rrati.github.io/blog/2014/05/07/apache-hadoop-plus-docker-plus-fedora-running-images">here</a> I’ve outlined how to get hadoop
running in docker containers, and along the way I’ve left some important
details unexplained. Well, now I’ll explain them.</p>
<h2>Startup Order</h2>
<p>The container startup order mentioned in my post <a href="">here</a> is because some
hadoop daemons require other daemons to be up before they will start. The
image that really dictates the start order is the datanode. That image will
launch a datanode and a yarn node manager. The datanode will try to communicate
with the namenode, and the node manager will try to communicate with the
resource manager. When either the datanode or the node manager are unable to
communicate with their respective dependencies, they exit instead of retrying.
That could possibly be mitigate but configuring supervisord to restart the
daemon if it exits, and you could still end up in a race condition to get the
containers started.</p>
<h2>Hostnames</h2>
<p>So why the specific hostnames in DNS? That has more to do with how hadoop is
configured inside the images. If you look at the docker hadoop configuration
files provided by Scott Collier they use hostnames instead of localhost or
static IP addresses for what hosts are running important components. This is
namely the namenode and the resource manager.</p>
<p>In the core-site.xml you can see where the namenode is defined:</p>
<pre><code><property>
<name>fs.default.name</name>
<value>hdfs://namenode:8020</value>
</property>
</code></pre>
<p>which is what imposes the requirement for the hostname of the namenode. In the
yarn-site.xml you’ll find this:</p>
<pre><code><property>
<name>yarn.resourcemanager.hostname</name>
<value>resourcemgr</value>
</property>
</code></pre>
<p>And that imposes the hostname of the resource manager. The hostnames for any
datanodes aren’t imposed by anything in the configuration. I use datanode#
for simplicity, but they could be anything so long as you use a hostname on
container startup that you have setup in dnsmasq.</p>
<p>When I launch the containers, I give them hostnames via the -h option. As
an example, the beginning of the line that I used to launch the namenode is:</p>
<pre><code>docker run -d -h namenode ...
</code></pre>
<p>Here I am giving the container the hostname namenode. If you want you can
modify the configuration files that will be added to the images to use any
hostname scheme you like, build the image(s), and then launch then with the
appropriate hostname.</p>
<h2>Port Mapping</h2>
<p>I didn’t mention this earlier, but the running containers have their web portals
accessible to anything that can contact the host machine. If you browse to
port 50070 on the host machine, you’ll see the namenode’s web portal. This is
achieved by the port mapping option(s) I used when I started the containers.
Using the namenode as an example, the relevant part of the command I used is:</p>
<pre><code>docker run -d ... -p 50070:50070 ...
</code></pre>
<p>The -p is doing the port mapping. It is mapping port 50070 on the host machine
to port 50070 on the container. This also means no other containers can try
to map port 50070 to another container. What this means for the hadoop
containers is that I can’t launch 2 datanode images with the same ports mapped
on the same host. To get around that, I usually launch the datanodes with
this command:</p>
<pre><code>docker run -d -h datanode1 --dns <dns_ip> -p 50075 -p 8042 rrati/hadoop-datanode
</code></pre>
<p>What this does is it tells docker to map an ephemeral port on the host machine
to 50075 on the container and another ephemeral port on the host machine to
8042 on the container. I discover which ephemeral ports are used by running:</p>
<pre><code>docker ps
</code></pre>
<p>In the output you will see something like:</p>
<p>de33e77a7d73 rrati/hadoop-datanode:latest supervisord -n 6 seconds ago Up 4 seconds 0.0.0.0:62000->50075/tcp, 0.0.0.0:62001->8042/tcp, 45454/tcp, 50010/tcp, 50020/tcp, 50475/tcp, 8010/tcp, 8040/tcp angry_bohr</p>
<p>So to access the datanode web portal I would access port 62000 on the host
machine, and to access the node manager web portal I would access port 62001.</p>
</div>
</article>
<article>
<header>
<h1 class="entry-title"><a href="/blog/2014/05/07/apache-hadoop-plus-docker-plus-fedora-running-images/">Apache Hadoop + Docker + Fedora: Running Images</a></h1>
<p class="meta">
<time datetime="2014-05-07T09:08:27-04:00" pubdate data-updated="true">May 7<span>th</span>, 2014</time>
</p>
</header>
<div class="entry-content"><p>In a previous <a href="http://rrati.github.io/blog/2014/05/06/apache-hadoop-plus-docker-plus-fedora-building-images">post</a> I discussed how to generate docker images that include
a pre-configured and simple hadoop setup. Now it’s time to run them and that
provides us with our first hurdle: DNS</p>
<h2>Hadoop and DNS</h2>
<p>If you’ve ever tried to run a hadoop cluster without DNS you may not have
gotten very far. The namenode appears to use DNS lookups to verify datanodes
that try to connect to it. If DNS isn’t setup properly then the namenode will
never show any datanodes connected to it and if you check the namenode logs
you’ll see an odd error like:</p>
<p>error: DisallowedDatanodeException: Datanode denied communication with namenode: DatanodeRegistration(<ip|hostname>, storageID=DS-1141974467-172.17.0.14-50010-1393006490185, infoPort=50075, ipcPort=50020, storageInfo=lv=-47;cid=CID-555184a7-6958-41d3-96d2-d8bcc7211819;nsid=1734635015;c=0)</p>
<p>Using only IP addresses in the configuration files won’t solve the problem
either, as the namenode appears to do a reverse lookup for nodes that come in
with an IP instead of a hostname. Where’s the trust?</p>
<p>There are a few solutions to this depending on the version of hadoop that will
be installed in the image. Fedora 20 has hadoop 2.2.0 and will require a DNS
solution which I’ll detail in just a bit. I’m working on updating hadoop to
2.4.0 for Fedora 21 and that has a configuration option that was introduced in
2.3.0 that may allow you to disable reverse DNS lookups from the namenode when
datanodes register. For hadoop 2.3.0 and beyond you may avoid the need to set
up a DNS server by adding the following snipet to the hdfs-site.xml config file:</p>
<pre><code><property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
</code></pre>
<p>If you need to setup a DNS server it isn’t too hard, but it does limit how
functional these containers can be since the hostname of the datanodes will
need to be resolvable in DNS. That’s not too bad for a local setup where IP
addresses can be easily controlled, but when you branch out to using multiple
physical hosts this can be a problem for a number of reasons. I’ll go through
some of the limitations in a later post, but for now I’ll go through using
dnsmasq to setup a local DNS server to get these containers functioning on a
single host.</p>
<h2>Setting Up Dnsmasq</h2>
<p>This is pretty well covered in the README, but I’ll cover it again here in a
little more detail. First we need to install dnsmasq:</p>
<pre><code>yum install dnsmasq
</code></pre>
<p>Next you’ll need to configure dnsmasq to listen on the bridge interface docker
will use. By default that is the interface docker0. To tell dnsmasq to listen
on that interface:</p>
<pre><code>echo "interface=docker0" >> /etc/dnsmasq.conf
</code></pre>
<p>Next we need to setup the forward and reverse resolvers. Create a 0hosts file
in /etc/dnsmasq.d and add these entries to it:</p>
<pre><code>address="/namenode/<namenode_ip>"
ptr-record=<reverse_namenode_ip>.in-addr.arpa,namenode
address="/resourcemgr/<resourcemgr_ip>"
ptr-record=<reserve_resourcemgr_ip>.in-addr.arpa,resourcemgr
address="/datanode1/<datanode_ip>"
ptr-record=<reverse_datanode_ip>.in-addr.arpa,datanode1
</code></pre>
<p>The hostnames for the namenode and resource manager are important if using
images generated from the Dockerfiles I pointed at earlier.</p>
<p>What IP addresses should you use? Well, that’s a slightly more complicated
answer than it seems because of how docker hands out IP addresses. I’m going
to use an example where the namenode is given the IP address 172.17.0.2, so
the DNS entries for the namenode with that IP address is:</p>
<pre><code>address="/namenode/172.17.0.2"
ptr-record=2.0.17.172.in-addr.arpa,namenode
</code></pre>
<p>If you want to add more datanodes to the pool you’ll obviously need to add
more entries to the DNS records. Now that we’ve got dnsmasq configured let’s
start it:</p>
<pre><code>systemctl start dnsmasq
</code></pre>
<h2>Starting the Containers</h2>
<p>Now that we have DNS setup we can start some containers. As you might expect
there’s a catch here as well. The containers must be started in the following
order:</p>
<ol>
<li>namenode</li>
<li>resource manager</li>
<li>datanode(s)</li>
</ol>
<p>This startup order is imposed by the hadoop daemons and what they do when they
fail to contact another daemon they depend upon. In some instances I’ve seen
the daemons attempt to reconnect, and others I’ve seen them just exit. The
surefire way to get everything up and running is to start the containers in the
order I provided.</p>
<p>To start the namenode, execute:</p>
<pre><code>docker run -d -h namenode --dns <dns_ip> -p 50070:50070 <username>/hadoop-namenode
</code></pre>
<p>What this command is doing is important so I’ll break it down piece by piece:</p>
<ul>
<li>-d: Run in daemon mode</li>
<li>-h: Give the container this hostname</li>
<li>—dns: Set this as the DNS server in the container. It should be the IP address of the router inside docker. This should always be the first IP address in the subnet determined by the bridge interface.</li>
<li>-p: Map a port on the host machine to a port on the container</li>
</ul>
<p>For the containers and the DNS setup I’ve been detailing in my posts using the
default docker bridge interface I would execute:</p>
<pre><code>docker run -d -h namenode --dns 172.17.0.1 -p 50070:50070 rrati/hadoop-namenode
</code></pre>
<p>The resource manager and the datanode are started similarly:</p>
<pre><code>docker run -d -h resourcemgr --dns <dns_ip> -p 8088:8088 -p 8032:8032 <username>/hadoop-resourcemgr
docker run -d -h datanode1 --dns <dns_ip> -p 50075:50075 -p 8042:8042 <username>/hadoop-datanode
</code></pre>
<p>Make sure that the hostnames provided with the -h option match the hostnames
you setup in dnsmasq.</p>
<h2>Using External Storage</h2>
<p>This setup is using HDFS for storage, but that’s not going to do us much good
if the everything in the namenode or a datanode is lost every time a container
is stopped. To get around that you can map directories into the container on
startup. This would allow the container to write data to a location that won’t
be destroyed when the container is shut down. To map a directory into the
namenode’s main storage location, you would execute:</p>
<p> docker run -d -h namenode —dns 172.17.0.1 -p 50070:50070 -v <persistent_storage_dir>:/var/cache/hadoop-hdfs rrati/hadoop-namenode</p>
<p>This will mount whatever directory pointed to by <persistent_storage_dir> in
the container at /var/lib/hadoop-hdfs. The storage directory will need to be
writable by the user running the daemon in the container. In the case of the
namenode, the daemon is run by the user hdfs.</p>
<h2>Submitting a Job</h2>
<p>We’re about ready to submit a job into the docker cluster we started. First
we need to setup our host machine to talk to the hadoop cluster. This is
pretty simple and there are a few ways to do it. Since I didn’t expose the
appropriate ports when I started the namenode and resourcemanager I will use
the hostnames/IPs of the running containers. I could have exposed the required
ports when I started the containers and pointed the hadoop configuration files
at localhost:<port>, but for this example I elected not to do that.</p>
<p>First you need to install some hadoop pieces on the host system:</p>
<pre><code>yum install hadoop-common hadoop-yarn
</code></pre>
<p>Then you’ll need to modify /etc/hadoop/core-site.xml to point at the namenode.
Replace the exist property definition for the follwing with:</p>
<pre><code><property>
<name>fs.default.name</name>
<value>hdfs://namenode:8020</value>
</property>
</code></pre>
<p>For simplity I use the hostnames I setup in my DNS server so I only have one
location I have to deal with if IPs change. You also have to make sure to add
the dnsmasq server to the list of DNS servers in /etc/resolv.conf if you do it
this way. Using straight IPs works fine as well.</p>
<p>Next you’ll need to add this to /etc/hadoop/yarn-site.xml:</p>
<pre><code><property>
<name>yarn.resourcemanager.hostname</name>
<value>resourcemgr</value>
</property>
</code></pre>
<p>Again I’m using the hostname defined in the dnsmasq server. Once you make
those two changes you can submit a job to your hadoop cluster running in your
containers.</p>
</div>
</article>
<article>
<header>
<h1 class="entry-title"><a href="/blog/2014/05/06/apache-hadoop-plus-docker-plus-fedora-building-images/">Apache Hadoop + Docker + Fedora: Building Images</a></h1>
<p class="meta">
<time datetime="2014-05-06T09:50:10-04:00" pubdate data-updated="true">May 6<span>th</span>, 2014</time>
</p>
</header>
<div class="entry-content"><p>Getting Apache Hadoop running in docker presents some interesting challenges.
I’ll be discussing some of the challeneges as well as limitations in a later
post. In this post I’ll go through the basics of getting docker running on
Fedora and generating images with hadoop pre-installed and configured.</p>
<h2>Docker Setup</h2>
<p>I use Fedora for my host system when running docker images, and luckily docker
has been a part of Fedora since Fedora 19. First you need to install the
docker-io packages:</p>
<pre><code>yum install docker-io
</code></pre>
<p>Then you need to start docker:</p>
<pre><code>systemctl start docker
</code></pre>
<p>And that’s it. Docker is now running on your Fedora host and it’s ready to
download or generate images. If you want docker to start on system boot then
you’ll need to enable it:</p>
<pre><code>systemctl enable docker
</code></pre>
<h2>Generating Hadoop Images</h2>
<p>Scott Collier has done a great job providing docker configurations for a
number of different use cases, and his hadoop docker configuration provides
an easy way to generate docker images with hadoop installed and configured.
Scott’s hadoop docker configuration files can be found <a href="https://github.com/scollier/Fedora-Dockerfiles/tree/master/hadoop">here</a>. There are 2 paths you can choose:</p>
<ul>
<li>All of hadoop running in a single container (single_container)</li>
<li>Hadoop split into multiple containers (multi_container)</li>
</ul>
<p>The images built from the files in these directories will contain the latest
version of hadoop in the Fedora repositories. At the time of this writing
that is hadoop 2.2.0 running on Fedora 20. I’ll be using the images generated
from the multi_container directory because I find them more interesting and
they’re closer to what a real hadoop deployment would be like.</p>
<p>Inside the multi_container direcory you’ll find directories for the different
images as well as README files that explain how to build the image.</p>
<h2>A Brief Overview of a Dockerfile</h2>
<p>The Dockerfile in each directory controls how the docker image is generated.
For these images each docker file inherits from the fedora docker image,
updates existing packages, and installs all the bits hadoop needs. Then
some customized configuration/scripts are added to the image, and some ports
are exposed for networking. Finally the images will launch an init type
service. Currently the images use supervisord to launch and monitor the hadoop
processes for the image, and which daemons will be started and how they will be
managed is controlled by the supervisord configuration file. There is some
work to allow systemd to run inside a container so it’s possible later
revisions of the Dockerfiles could use systemd instead.</p>
<p>The hadoop configuration in this setup is as simple as possible. There is
no secure deployment, HA, mapreduce history server, etc. Some additional
processes are stubbed out in the supervisord configuration files but are not
enabled. For anything beyond a simple deployment, like HA or secure, you
will need to modify the hadoop configuration files added to the image as
well as the docker and supervisord configuration files.</p>
<h2>Building an Image</h2>
<p>Now that we have a general idea of what will happen, let’s build an image.
Each image is built roughly the same way. First go into the directory for
the image you want to generate and execute a variant of :</p>
<pre><code>docker build -rm -t <username>/<image_name> .
</code></pre>
<p>You can name the images anything you like. I usually name them in the form
hadoop-<function>, so to generate the namenode with this naming convention I
would execute:</p>
<pre><code>docker build -rm -t rrati/hadoop-namenode .
</code></pre>
<p>Docker will head off and build the image for me. It can take quite some time
for the image generation to complete, but when it’s done you should be able
to see your image by executing:</p>
<pre><code>docker images
</code></pre>
<p>If the machine you are building these images on is running docker as a user
other than your account then you will probably need to execute the above
commands as the user running docker. On Fedora 20, the system docker instance
is running as the root user so I prepend sudo to all of my docker commands.</p>
<p>If you do these steps for each directory you should end up with 3 images in
docker and you’re ready to start them up.</p>
</div>
</article>
<article>
<header>
<h1 class="entry-title"><a href="/blog/2012/10/18/using-cluster-suites-gui-to-configure-high-availability-schedulers/">Using Cluster Suite’s GUI to Configure High Availability Schedulers</a></h1>
<p class="meta">
<time datetime="2012-10-18T13:20:00-04:00" pubdate data-updated="true">Oct 18<span>th</span>, 2012</time>
</p>
</header>
<div class="entry-content"><p>In an <a href="http://rrati.github.com/blog/2012/09/26/using-cluster-suite-to-manage-a-high-availability-scheduler/">earlier post</a> I talked about using Cluster Suite
to manage high availability schedulers and referenced the command line tools
available perform the configuration. I’d like to focus on using the GUI that
is part of Cluster Suite to configure an HA schedd. It’s a pretty simple
process but does require you run a wallaby shell command to complete the
configuration.</p>
<p>The first thing you need to do is create or import your cluster in the GUI.
If you already have a cluster in the GUI then make sure the nodes you want to
be part of a HA schedd configuration are part of the cluster.</p>
<p>The next step is to create a restricted Failover Domain. Nodes in this domain
will run the schedd service you create, and making it restricted ensures that
no nodes outside the Failover Domain will run the service. If a node in the
Failover Domain isn’t available then the service won’t run.</p>
<p>The third step is to create a service that will comprise your schedd. Make
sure that the relocation policy on the service is Relocate and that it is
configured to use whatever Failover Domain you have already setup. The
service will contain 2 resources in a parent-child configuration. The parent
service is the NFS Mount and the child service is a condor instance resource.
This is what sets up the dependency between the NFS Mount being required for
the condor instance to run. When the resources are configured like this it
means the parent must be functioning for the child to operate.</p>
<p>Finally, you need to sync the cluster configuration with wallaby. This is
easily accomplished by logging into a machine in the cluster and running:</p>
<pre><code>wallaby cluster-sync-to-store
</code></pre>
<p>That wallaby shell command will inspect the cluster configuration and
configure wallaby to match it. It can handle any number of schedd
configurations so you don’t need to run it once per setup. However, until
the cluster-sync-to-store command is executed, the schedd service you created
can’t and won’t run.</p>
<p>Start your service or wait for Cluster Suite to do it for you and you’ll find
an HA schedd in your pool.</p>
<p>You can get a video of the process as <a href="http://rrati.fedorapeople.org/videos/cs_gui_schedd.ogv">ogv</a> or <a href="http://rrati.fedorapeople.org/videos/cs_gui_schedd.mp4">mp4</a> if the inline video doesn’t work.</p>
<p><video width='800' height='600' preload='none' controls poster=''><source src='http://rrati.fedorapeople.org/videos/cs_gui_schedd.mp4' type='video/mp4; codecs="avc1.42E01E, mp4a.40.2"'></video></p>
</div>
</article>
<article>
<header>
<h1 class="entry-title"><a href="/blog/2012/09/26/using-cluster-suite-to-manage-a-high-availability-scheduler/">Using Cluster Suite to Manage a High Availability Scheduler</a></h1>
<p class="meta">
<time datetime="2012-09-26T15:53:00-04:00" pubdate data-updated="true">Sep 26<span>th</span>, 2012</time>
</p>
</header>
<div class="entry-content"><p>Condor provides simple and easy to configure HA functionality for the schedd
that relies upon shared storage (usually NFS). The shared store is used to
store the job queue log and coordinate which node is running the schedd. This
means that each node that can run a particular schedd not only have condor
configured but the node needs to be configured to access the shared storage.</p>
<p>For most people condor’s native HA management of the schedd is probably
enough. However, using Cluster Suite to manage the schedd provides some
additional control and protects against job queue corruption that can occur
in rare instances due to issues with the shared storage mechanism.</p>
<p>Condor even provides all the tools necessary to hook into Cluster Suite,
including a new set of commands for the wallaby shell that make configuration
and management tasks as easy as a single command. While a functioning wallaby
setup isn’t required to work with Cluster Suite, I would highly recommended
using it. The wallaby shell commands greatly simplify configuring both
Cluster Suite and condor nodes (through wallaby).</p>
<p>There are two tools that condor provides for integrating with Cluster
Suite. One is the set of wallaby shell commands I already mentioned. The
other is a Resource Agent for condor, which gives Cluster Suite control over
the schedd.</p>
<p>With the above pieces in place and a fully functional wallaby setup,
configuration of a schedd is as simple as:</p>
<pre><code>wallaby cluster-create name=<name> spool=<spool> server=<server> export=<export> node1 node2 ...
</code></pre>
<p>With that single command, the wallaby shell command will configure Cluster
Suite to run an HA schedd to run on the list of nodes provided. It will also
configure those same nodes in wallaby to run an HA schedd. Seems nice, but
what are the advantages? Plenty.</p>
<p>You gain a lot of control over which node is running the schedd. With
condor’s native mechanism, it’s pot luck which node will run the schedd. All
nodes point to the same shared storage and whoever gets there first will run
the schedd. Every time. If a specific node is having problems that cause
the schedd to crash, it could continually win the race to run the schedd
leaving your highly available schedd not very available.</p>
<p>Cluster Suite doesn’t rely upon the shared storage to determine which node
is going to run the schedd. It has a set of tools, including a GUI, that
allow you to move a schedd from one node to another at any time. In addition
to that, you can specify parameters that control when Cluster Suite will
decide to move the schedd to another node instead of restarting it on the
same machine. For example, I can tell Cluster Suite to move the schedd to
another machine if it restarts 3 times in 60 seconds.</p>
<p>Cluster Suite also manages the shared storage. I don’t have to configure
each node to mount the shared storage at the same mount point and ensure it
will be mounted at boot. Cluster Suite creates the mount point on the machine
and mounts the shared storage when it starts the schedd. This means the
shared store is only mounted on the node running the schedd, which removes
the job queue corruption that can occur if 2 HA schedds run at the same time
on 2 different machines.</p>
<p>Having Cluster Suite manage the shared storage for an HA schedd provides
another benefit as well. Access to the shared storage becomes required for
the schedd to run. If there is an interruption in accessing the shared
storage on a node running the schedd Cluster Suite will shutdown the schedd
and start it on another node. This means no more split brain.</p>
<p>Are there any downsides to using Cluster Suite to manage my schedds? Not many
actually. Obviously you need to have Cluster Suite installed on each node
that will be part of an HA schedd configuration, so there’s an additional
disk space/memory requirement. The biggest issue I’ve found is that since
the condor_master will not be managing the schedds, none of the daemon
management commands will work (ie condor_on|off|restart, etc). Instead you
would need to use Cluster Suite’s tools for those tasks.</p>
<p>You will also have to setup fencing in Cluster Suite for everything to work
correctly, which might mean new hardware if you don’t have a remotely
manageable power setup. If Cluster Suite can’t fence a node when it
determines it needs to it will shut down the service completely to avoid
corruption. A way to handle this if you don’t have the power setup is to
use virtual machines for your schedd nodes. Cluster Suite has a means to do
fencing without needing an external power management setup for virtual machines.</p>
</div>
</article>
<article>
<header>
<h1 class="entry-title"><a href="/blog/2012/09/18/putting-it-together/">Putting It Together</a></h1>
<p class="meta">
<time datetime="2012-09-18T08:59:00-04:00" pubdate data-updated="true">Sep 18<span>th</span>, 2012</time>
</p>
</header>
<div class="entry-content"><p>Condor already provides the ability to integrate with numerous computing
resources, and I will be discussing ways for it to do so with other bits
and pieces to enhance existing or provide new functionality.</p>
</div>
</article>
<div class="pagination">
<a href="/blog/archives">Blog Archives</a>
</div>
</div>
<aside class="sidebar">
<section>
<h1>Recent Posts</h1>
<ul id="recent_posts">
<li class="post">
<a href="/blog/2014/05/09/apache-hadoop-plus-docker-plus-fedora-issues-and-limitations/">Apache Hadoop + docker + Fedora: Issues and Limitations</a>
</li>
<li class="post">
<a href="/blog/2014/05/08/apache-hadoop-plus-docker-plus-fedora-why-it-works/">Apache Hadoop + docker + Fedora: Why it Works</a>
</li>
<li class="post">
<a href="/blog/2014/05/07/apache-hadoop-plus-docker-plus-fedora-running-images/">Apache Hadoop + docker + Fedora: Running Images</a>
</li>
<li class="post">
<a href="/blog/2014/05/06/apache-hadoop-plus-docker-plus-fedora-building-images/">Apache Hadoop + docker + Fedora: Building Images</a>
</li>
<li class="post">
<a href="/blog/2012/10/18/using-cluster-suites-gui-to-configure-high-availability-schedulers/">Using Cluster Suite’s GUI to configure High Availability Schedulers </a>
</li>
</ul>
</section>
<section>
<h1>GitHub Repos</h1>
<ul id="gh_repos">
<li class="loading">Status updating…</li>
</ul>
<a href="https://github.com/rrati">@rrati</a> on GitHub
<script type="text/javascript">
$.domReady(function(){
if (!window.jXHR){
var jxhr = document.createElement('script');
jxhr.type = 'text/javascript';
jxhr.src = '/javascripts/libs/jXHR.js';
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(jxhr, s);
}
github.showRepos({
user: 'rrati',
count: 0,
skip_forks: true,
target: '#gh_repos'
});
});
</script>
<script src="/javascripts/github.js" type="text/javascript"> </script>
</section>
</aside>
</div>
</div>
<footer role="contentinfo"><p>
Copyright © 2014 - Robert Rati -
<span class="credit">Powered by <a href="http://octopress.org">Octopress</a></span>
</p>
</footer>
<script type="text/javascript">
(function(){
var twitterWidgets = document.createElement('script');
twitterWidgets.type = 'text/javascript';
twitterWidgets.async = true;
twitterWidgets.src = 'http://platform.twitter.com/widgets.js';
document.getElementsByTagName('head')[0].appendChild(twitterWidgets);
})();
</script>
</body>
</html>