ManuallyInstallingPivotalHD2.0Stack.html


<!doctype html>
<html>
<head>
  <meta charset="utf-8">

  <!-- Always force latest IE rendering engine or request Chrome Frame -->
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible">

  <!-- REPLACE X WITH PRODUCT NAME -->
  <title>Manually Installing Pivotal HD 2.0 Stack | Pivotal Docs</title>
    <!-- Local CSS stylesheets -->
    <link href="/stylesheets/master.css" media="screen,print" rel="stylesheet" type="text/css" />
    <link href="/stylesheets/breadcrumbs.css" media="screen,print" rel="stylesheet" type="text/css" />
    <link href="/stylesheets/search.css" media="screen,print" rel="stylesheet" type="text/css" />
    <link href="/stylesheets/portal-style.css" media="screen,print" rel="stylesheet" type="text/css" />
    <link href="/stylesheets/printable.css" media="print" rel="stylesheet" type="text/css" /> 
    <!-- Confluence HTML stylesheet -->
    <link href="/stylesheets/site-conf.css" media="screen,print" rel="stylesheet"  type="text/css" /> 
    <!-- Left-navigation code -->
    <!-- http://www.designchemical.com/lab/jquery-vertical-accordion-menu-plugin/examples/# -->
    <link href="/stylesheets/dcaccordion.css" rel="stylesheet" type="text/css" />
    <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js" type="text/javascript"></script>
    <script src="/javascripts/jquery.cookie.js" type="text/javascript"></script>
    <script src="/javascripts/jquery.hoverIntent.minified.js" type="text/javascript"></script>
    <script src="/javascripts/jquery.dcjqaccordion.2.7.min.js" type="text/javascript"></script>
    <script type="text/javascript">
                    $(document).ready(function($){
					$('#accordion-1').dcAccordion({
						eventType: 'click',
						autoClose: true,
						saveState: true,
						disableLink: false,
						speed: 'fast',
						classActive: 'test',
						showCount: false
					});
					});
        </script>
    <link href="/stylesheets/grey.css" rel="stylesheet" type="text/css" /> 
    <!-- End left-navigation code -->
    <script src="/javascripts/all.js" type="text/javascript"></script>
    <link href='http://www.gopivotal.com/misc/favicon.ico' rel='shortcut icon'>
    <script type="text/javascript">
    if (window.location.host === 'docs.gopivotal.com') {
        var _gaq = _gaq || [];
        _gaq.push(['_setAccount', 'UA-39702075-1']);
        _gaq.push(['_setDomainName', 'gopivotal.com']);
        _gaq.push(['_trackPageview']);

        (function() {
          var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
          ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
          var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
        })();
    }
  </script>
</head>

<body class="pivotalcf pivotalcf_getstarted pivotalcf_getstarted_index">
  <div class="viewport">
    <div class="mobile-navigation--wrapper mobile-only">
      <div class="navigation-drawer--container">
        <div class="navigation-item-list">
          <div class="navbar-link active">
            <a href="http://gopivotal.com">
              Home
              <i class="icon-chevron-right pull-right"></i>
            </a>
          </div>
          <div class="navbar-link">
            <a href="http://gopivotal.com/paas">
              PaaS
              <i class="icon-chevron-right pull-right"></i>
            </a>
          </div>
          <div class="navbar-link">
            <a href="http://gopivotal.com/big-data">
              Big Data
              <i class="icon-chevron-right pull-right"></i>
            </a>
          </div>
          <div class="navbar-link">
            <a href="http://gopivotal.com/agile">
              Agile
              <i class="icon-chevron-right pull-right"></i>
            </a>
          </div>
          <div class="navbar-link">
            <a href="http://gopivotal.com/support">
              Help &amp; Support
              <i class="icon-chevron-right pull-right"></i>
            </a>
          </div>
          <div class="navbar-link">
            <a href="http://gopivotal.com/products">
              Products
              <i class="icon-chevron-right pull-right"></i>
            </a>
          </div>
          <div class="navbar-link">
            <a href="http://gopivotal.com/solutions">
              Solutions
              <i class="icon-chevron-right pull-right"></i>
            </a>
          </div>
          <div class="navbar-link">
            <a href="http://gopivotal.com/partners">
              Partners
              <i class="icon-chevron-right pull-right"></i>
            </a>
          </div>
        </div>
      </div>
      <div class="mobile-nav">
        <div class="nav-icon js-open-nav-drawer">
          <i class="icon-reorder"></i>
        </div>
        <div class="header-center-icon">
          <a href="http://gopivotal.com">
            <div class="icon icon-pivotal-logo-mobile"></div>
          </a>
        </div>
      </div>
    </div>

    <div class='wrap'>
      <script src="//use.typekit.net/clb0qji.js" type="text/javascript"></script>
      <script type="text/javascript">
          try {
              Typekit.load();
          } catch (e) {
          }
      </script>
      <script type="text/javascript">
          document.domain = "gopivotal.com";
      </script>

	<script type="text/javascript">
	  WebFontConfig = {
	    google: { families: [ 'Source+Sans+Pro:300italic,400italic,600italic,300,400,600:latin' ] }
	  };
	  (function() {
	    var wf = document.createElement('script');
	    wf.src = ('https:' == document.location.protocol ? 'https' : 'http') +
	      '://ajax.googleapis.com/ajax/libs/webfont/1/webfont.js';
	    wf.type = 'text/javascript';
	    wf.async = 'true';
	    var s = document.getElementsByTagName('script')[0];
	    s.parentNode.insertBefore(wf, s);
	  })(); </script>

      <div id="search-dropdown-box">
        <div class="search-dropdown--container js-search-dropdown">
          <div class="container-fluid">
            <div class="close-menu-large"><img src="http://www.gopivotal.com/sites/all/themes/gopo13/images/icon-close.png" /></div>
            <div class="search-form--container">
              <div class="form-search">
                <div class='gcse-search'></div>
                <script src="http://www.google.com/jsapi" type="text/javascript"></script>
                <script src="/javascripts/cse.js" type="text/javascript"></script>
              </div>
            </div>
          </div>
        </div>
      </div>

      <header class="navbar desktop-only" id="nav">
        <div class="navbar-inner">
            <div class="container-fluid">
                <div class="pivotal-logo--container">
                    <a class="pivotal-logo" href="http://gopivotal.com"><span></span></a>
                </div>

                <ul class="nav pull-right">
                    <li class="navbar-link">
                        <a href="http://www.gopivotal.com/paas" id="paas-nav-link">PaaS</a>
                    </li>
                    <li class="navbar-link">
                        <a href="http://www.gopivotal.com/big-data" id="big-data-nav-link">BIG DATA</a>
                    </li>
                    <li class="navbar-link">
                        <a href="http://www.gopivotal.com/agile" id="agile-nav-link">AGILE</a>
                    </li>
                    <li class="navbar-link">
                        <a href="http://www.gopivotal.com/oss" id="oss-nav-link">OSS</a>
                    </li>
                    <li class="nav-search">
                        <a class="js-search-input-open" id="click-to-search"><span></span></a>
                    </li>
                </ul>
            </div>
            <a href="http://www.gopivotal.com/contact">
                <img id="get-started" src="http://www.gopivotal.com/sites/all/themes/gopo13/images/get-started.png">
            </a>
        </div>
      </header>
      <div class="main-wrap">
        <div class="container-fluid">

          <!-- Google CSE Search Box -->
          <div id='docs-search'>
              <gcse:search></gcse:search>
          </div>
          
          <div id='all-docs-link'>
            <a href="http://docs.gopivotal.com/">All Documentation</a>
          </div>
          
          <div class="container">
            <div id="sub-nav" class="nav-container">              
              
              <!-- Collapsible left-navigation-->
				<ul class="accordion"  id="accordion-1">
					<!-- REPLACE <li/> NODES-->
                        <li>
                <a href="index.html">Home</a></br>
                                
                        <li>
                <a href="PivotalHD.html">Pivotal HD 2.0.1</a>

                            <ul>
                    <li>
                <a href="PHDEnterprise2.0.1ReleaseNotes.html">PHD Enterprise 2.0.1 Release Notes</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="PHDInstallationandAdministration.html">PHD Installation and Administration</a>

                            <ul>
                    <li>
                <a href="OverviewofPHD.html">Overview of PHD</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="InstallationOverview.html">Installation Overview</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="PHDInstallationChecklist.html">PHD Installation Checklist</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="InstallingPHDUsingtheCLI.html">Installing PHD Using the CLI</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="UpgradeChecklist.html">Upgrade Checklist</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="UpgradingPHDUsingtheCLI.html">Upgrading PHD Using the CLI</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="AdministeringPHDUsingtheCLI.html">Administering PHD Using the CLI</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="PHDFAQFrequentlyAskedQuestions.html">PHD FAQ (Frequently Asked Questions)</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="PHDTroubleshooting.html">PHD Troubleshooting</a>

                    </li>
            </ul>
            </li>
            </ul>
                    <ul>
                    <li>
                <a href="StackandToolsReference.html">Stack and Tools Reference</a>

                            <ul>
                    <li>
                <a href="OverviewofApacheStackandPivotalComponents.html">Overview of Apache Stack and Pivotal Components</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="ManuallyInstallingPivotalHD2.0Stack.html">Manually Installing Pivotal HD 2.0 Stack</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="ManuallyUpgradingPivotalHDStackfrom1.1.1to2.0.html">Manually Upgrading Pivotal HD Stack from 1.1.1 to 2.0</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="PivotalHadoopEnhancements.html">Pivotal Hadoop Enhancements</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="Security.html">Security</a>

                    </li>
            </ul>
            </li>
            </ul>
            </li>
                        <li>
                <a href="PivotalCommandCenter.html">Pivotal Command Center 2.2.1</a>

                            <ul>
                    <li>
                <a href="PCC2.2.1ReleaseNotes.html">PCC 2.2.1 Release Notes</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="PCCUserGuide.html">PCC User Guide</a>

                            <ul>
                    <li>
                <a href="PCCOverview.html">PCC Overview</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="PCCInstallationChecklist.html">PCC Installation Checklist</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="InstallingPCC.html">Installing PCC</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="UsingPCC.html">Using PCC</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="CreatingaYUMEPELRepository.html">Creating a YUM EPEL Repository</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="CommandLineReference.html">Command Line Reference</a>

                    </li>
            </ul>
            </li>
            </ul>
            </li>
                        <li>
                <a href="PivotalHAWQ.html">Pivotal HAWQ 1.2.0</a>

                            <ul>
                    <li>
                <a href="HAWQ1.2.0.1ReleaseNotes.html">HAWQ 1.2.0.1 Release Notes</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQInstallationandUpgrade.html">HAWQ Installation and Upgrade</a>

                            <ul>
                    <li>
                <a href="PreparingtoInstallHAWQ.html">Preparing to Install HAWQ</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="InstallingHAWQ.html">Installing HAWQ</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="InstallingtheHAWQComponents.html">Installing the HAWQ Components</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="UpgradingHAWQandComponents.html">Upgrading HAWQ and Components</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQConfigurationParameterReference.html">HAWQ Configuration Parameter Reference</a>

                    </li>
            </ul>
            </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQAdministration.html">HAWQ Administration</a>

                            <ul>
                    <li>
                <a href="HAWQOverview.html">HAWQ Overview</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQQueryProcessing.html">HAWQ Query Processing</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="UsingHAWQtoQueryData.html">Using HAWQ to Query Data</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="ConfiguringClientAuthentication.html">Configuring Client Authentication</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="KerberosAuthentication.html">Kerberos Authentication</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="ExpandingtheHAWQSystem.html">Expanding the HAWQ System</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQInputFormatforMapReduce.html">HAWQ InputFormat for MapReduce</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQFilespacesandHighAvailabilityEnabledHDFS.html">HAWQ Filespaces and High Availability Enabled HDFS</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="SQLCommandReference.html">SQL Command Reference</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="ManagementUtilityReference.html">Management Utility Reference</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="ClientUtilityReference.html">Client Utility Reference</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQServerConfigurationParameters.html">HAWQ Server Configuration Parameters</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQEnvironmentVariables.html">HAWQ Environment Variables</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="HAWQDataTypes.html">HAWQ Data Types</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="SystemCatalogReference.html">System Catalog Reference</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="hawq_toolkitReference.html">hawq_toolkit Reference</a>

                    </li>
            </ul>
            </li>
            </ul>
                    <ul>
                    <li>
                <a href="PivotalExtensionFrameworkPXF.html">Pivotal Extension Framework (PXF)</a>

                            <ul>
                    <li>
                <a href="PXFInstallationandAdministration.html">PXF Installation and Administration</a>

                    </li>
            </ul>
                    <ul>
                    <li>
                <a href="PXFExternalTableandAPIReference.html">PXF External Table and API Reference</a>

                    </li>
            </ul>
            </div><!--end of sub-nav-->
            
            <h3 class="title-container">Manually Installing Pivotal HD 2.0 Stack</h3>
            <div class="content">
              <!-- Python script replaces main content -->
			  <div id ="main"><div style="visibility:hidden; height:2px;">Pivotal Product Documentation : Manually Installing Pivotal HD 2.0 Stack</div><div class="wiki-content group" id="main-content">
<p>This section describes how to manually install all the components included with Pivotal HD 2.0.</p><p><style type="text/css">/*<![CDATA[*/
div.rbtoc1400035785336 {padding: 0px;}
div.rbtoc1400035785336 ul {list-style: disc;margin-left: 0px;}
div.rbtoc1400035785336 li {margin-left: 0px;padding-left: 0px;}

/*]]>*/</style><div class="toc-macro rbtoc1400035785336">
<ul class="toc-indentation">
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-DistributionContents">Distribution Contents</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-AccessingPHD2.0">Accessing PHD 2.0</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-HadoopHDFS">Hadoop HDFS</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-HadoopYARN">Hadoop YARN</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-HadoopPseudo-distributedConfiguration">Hadoop Pseudo-distributed Configuration</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Zookeeper">Zookeeper</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-HBase">HBase</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Hive">Hive</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Hcatalog">Hcatalog</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Pig">Pig</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Mahout">Mahout</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Flume">Flume</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Sqoop">Sqoop</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Oozie">Oozie</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-Hamster">Hamster</a></li>
<li><a href="#ManuallyInstallingPivotalHD2.0Stack-GraphLab">GraphLab</a></li>
</ul>
</div></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-DistributionContents">Distribution Contents</h2><p>Pivotal HD 2.0 is a full Apache Hadoop distribution with Pivotal add-ons and a native integration with the Pivotal Greenplum database.</p><p>The RPM distribution of PHD 2.0 contains the following:</p><ul><li><strong>Hadoop 2.2.0</strong></li><li><strong>Pig 0.12.0</strong></li><li><strong>Zookeeper 3.4.5</strong></li><li><strong>HBase 0.96.0</strong></li><li><strong>Hive 0.12.0</strong></li><li><strong>Hcatalog 0.12.0</strong></li><li><strong>Mahout 0.7</strong></li><li><strong>Flume 1.4.0</strong></li><li><strong>Sqoop 1.4.2</strong></li><li><strong>Oozie 4.0.0</strong></li><li><strong>Hamster 1.0</strong></li><li><strong>GraphLab 2.2</strong></li></ul><h2 id="ManuallyInstallingPivotalHD2.0Stack-AccessingPHD2.0">Accessing PHD 2.0</h2><p>Download and extract the PHD package to your working directory:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$&gt;  tar zxvf PHD-2.0.1.0-&lt;nn&gt;.tar.gz
$&gt;  ls -p PHD-2.0.1.0-&lt;nn&gt;
flume/     hadoop/   hbase/     hive/    oozie/                        pig/    utility/
graphlab/  hamster/  hcatalog/  mahout/  open_source_licenses_PHD.txt  sqoop/  zookeeper/</pre>
</div></div><p>We define the replaced string, which we use in the following sections for each component, as follows:</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><th class="confluenceTh">Component</th><th class="confluenceTh" style="text-align: center;">PHD Version</th><th class="confluenceTh" style="text-align: center;">Replaced String</th></tr><tr><td class="confluenceTd">Hadoop</td><td class="confluenceTd"><code>2.2.0_gphd_3_0_1_0</code></td><td class="confluenceTd"><code>&lt;PHD_HADOOP_VERSION&gt;</code></td></tr><tr><td class="confluenceTd">HBase</td><td class="confluenceTd"><code>0.96.0_gphd_3_0_1_0</code></td><td class="confluenceTd"><code>&lt;PHD_HBASE_VERSION&gt;</code></td></tr><tr><td class="confluenceTd">Hive</td><td class="confluenceTd"><code>0.12.0_gphd_3_0_1_0</code></td><td class="confluenceTd"><code>&lt;PHD_HIVE_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">Pig</td><td class="confluenceTd" colspan="1"><code>0.12.0_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_PIG_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">Mahout</td><td class="confluenceTd" colspan="1"><code>0.7_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_MAHOUT_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">HCatalog</td><td class="confluenceTd" colspan="1"><code>0.12.0_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_HCATALOG_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">Sqoop</td><td class="confluenceTd" colspan="1"><code>1.4.2_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_SQOOP_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">Flume</td><td class="confluenceTd" colspan="1"><code>1.4.0_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_FLUME_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">Zookeeper</td><td class="confluenceTd" colspan="1"><code>3.4.5_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_ZOOKEEPER_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">Oozie</td><td class="confluenceTd" colspan="1"><code>4.0.0_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_OOZIE_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">bigtop-jsvc</td><td class="confluenceTd" colspan="1"><code>1.0.15_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_BIGTOP_JSVC_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">bigtop-utils</td><td class="confluenceTd" colspan="1"><code>0.4.0_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code>&lt;PHD_BIGTOP_UTILS_VERSION&gt;</code></td></tr><tr><td class="confluenceTd" colspan="1">bigtop-tomcat</td><td class="confluenceTd" colspan="1"><code>6.0.36_gphd_3_0_1_0</code></td><td class="confluenceTd" colspan="1"><code> &lt;PHD_BIGTOP_TOMCAT_VERSION&gt; </code></td></tr></tbody></table></div><p>The following sections describe how to manually install Pivotal HD 2.0.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisities">Prerequisities</h3><ul><li>Oracle Java Development Kit (JDK) 1.7 must be installed on every machine before installing any of the Hadoop components.</li><li>You must ensure that time synchronization and DNS are functioning correctly on all client and server machines. For example, run the following command to sync the time with NTP server:</li></ul><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$&gt; service ntpd stop; ntpdate &lt;DNS server IP address&gt;; service ntpd start</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-InstallationNotes">Installation Notes</h3><p>In this section, we install packages by running the following command:</p><p style="margin-left: 30.0px;"><code>rpm -ivh &lt;package_name&gt;-&lt;version&gt;-&lt;nn&gt;.rpm</code></p><p>Where:</p><p style="margin-left: 30.0px;"><code>&lt;version&gt;</code> is the PHD version</p><p style="margin-left: 30.0px;"><code>&lt;nn&gt;</code> is the build number</p><h2 id="ManuallyInstallingPivotalHD2.0Stack-HadoopHDFS">Hadoop HDFS</h2><p>This section provides instructions for installing each of the following core Hadoop RPMs:</p><ul><li>HDFS Namenode Setup</li><li>HDFS Datanode Setup</li><li>HDFS Secondary Namenode Setup</li><li>HDFS NFS Gateway Setup</li></ul><h3 id="ManuallyInstallingPivotalHD2.0Stack-HadoopHDFSRPMPackages">Hadoop HDFS RPM Packages</h3><p>Pivotal provides the following RPMs as part of this release. The core packages provide all executables, libraries, configurations, and documentation for Hadoop and are required on every node in the Hadoop cluster as well as on the client workstation that will access the Hadoop service. The daemon packages provide a convenient way to manage Hadoop HDFS daemons as Linux services, which rely on the core package.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>bigtop-utils, zookeeper-core</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hadoop core packages provides the common core packages for running Hadoop</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Every node in the Hadoop cluster and the client workstation that will access the Hadoop service.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop, bigtop-jsvc</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hadoop HDFS core packages provides the common files for running HFS.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Every node in the HDFS cluster and the client workstation that will access the HDFS.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-namenode-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop Namenode, which provides a convenient method to manage Namenode start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Only on HDFS Namenode server.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-datanode-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop Datanode, which provides a convenient method to manage datanode start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on all HDFS Datanodes.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-secondarynamenode-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop SecondaryNamenode, which provides a convenient method to manage SecondaryNamenode start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on one server that will be acting as the Secondary Namenode.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-journalnode-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop JournalNode, which provides a convenient method to manage journalnode start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on all HDFS JournalNodes.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-nfs3-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop NFS gateway, which provides a convenient method to manage NFS gateway start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on the node serving as as the NFS server</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-portmap-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop portmap, which provides a convenient method to manage portmap start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on the node serving as as the NFS server</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-zkfc-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop zkfc, which provides a convenient method to manage zkfc start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on all HDFS zkfc nodes.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-hdfs-fuse-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-libhdfs, hadoop-client</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Binaries that can be used to mount hdfs as a local directory.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on the servers that want to mount the HDFS.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-libhdfs-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Native implementation of the HDFS.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on servers that you want to run native hdfs.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-httpfs-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>bigtop-tomcat, Hadoop, hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>HttpFS is a server that provides a REST HTTP gateway supporting all HDFS File System operations (read and write).</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on servers that will be serving REST HDFS service</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-doc-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Doc</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>N/A</p></td></tr><tr><td class="confluenceTd"><p><strong>Description<br/> </strong></p></td><td class="confluenceTd"><p>Hadoop documentation package.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-client-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Library</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop, hadoop-yarn, hadoop-mapreduce, hadoop-hdfs</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>A set of symbolic link which gathers the libraries for programming Hadoop and submit Hadoop jobs.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Clients nodes that will be used to submit Hadoop jobs.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites:CorePackageSetup">Prerequisites: Core Package Setup</h3><p>Perform the following steps on all the nodes in the Hadoop cluster and its client nodes:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/utility/rpm/bigtop-utils-&lt;PHD_BIGTOP_UTILS_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/zookeeper/rpm/zookeeper-&lt;PHD_ZOOKEEPER_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
</pre>
</div></div><p>Where<code> working_dir</code> is the directory where you want the rpms expanded.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HDFSNamenodeSetup">HDFS Namenode Setup</h3><p>Install the Hadoop Namenode package on the workstation that will serve as HDFS Namenode:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/utility/rpm/bigtop-jsvc-&lt;PHD_BIGTOP_JSVC_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-hdfs-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadooop/rpm/hadoop-hdfs-namenode-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HDFSDatanodeSetup">HDFS Datanode Setup</h3><p>Install the Hadoop Datanode package on the workstation that will serve as the HDFS Datanode:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/utility/rpm/bigtop-jsvc-&lt;PHD_BIGTOP_JSVC_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-hdfs-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-hdfs-datanode-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HDFSSecondaryNamenodeSetup">HDFS Secondary Namenode Setup</h3><p>Install the Hadoop Secondary Namenode package on the workstation that will serve as the HDFS Secondary Namenode:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/utility/rpm/bigtop-jsvc-&lt;PHD_BIGTOP_JSVC_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-hdfs-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-hdfs-secondarynamenode-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HDFSNFSGatewaySetup">HDFS NFS Gateway Setup</h3><p>Install the Hadoop NFS gateway and portmap package on the workstation that will serve as the HDFS NFS gateway and portmap:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/utility/rpm/bigtop-jsvc-&lt;PHD_BIGTOP_JSVC_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-hdfs-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-hdfs-nfs3-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-hdfs-portmap-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HDFSConfiguration">HDFS Configuration</h3><p>HDFS configuration files are located in the following directory:</p><p style="margin-left: 30.0px;"><code>/etc/gphd/hadoop/conf/</code></p><p>Refer to the Apache Hadoop documentation for how to configure HDFS in distributed mode.<code> <br/> </code></p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage">Usage</h3><p>After installing the daemon package for Hadoop, you can start the daemons, as follows:</p><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingHDFS">Starting HDFS</h4><p>HDFS includes three main components: Namenode, Datanode, Secondary Namenode.</p><p><strong>To start the Namenode daemon:</strong></p><p>Format the Namenode before starting it, as follows:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u hdfs hdfs namenode -format</pre>
</div></div> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<p><strong>Note</strong>: You only have to do this once. However, if you have changed the Hadoop namenode configuration, you may need to run this again.</p>
</div>
</div>
<p>Then start the Namenode by running the following commands:</p><p>Either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-hdfs-namenode start</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-hdfs-namenode start</pre>
</div></div><p>When Namenode is started, you can visit its dashboard at: <code>http://localhost:50070/</code></p><p><strong>To start the Datanode daemon:</strong></p><p>Run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-hdfs-datanode start</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-hdfs-datanode start</pre>
</div></div><p><br/> <strong>To start the Secondary Namenode daemon:</strong></p><p>Run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-hdfs-secondarynamenode start</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-hdfs-secondarynamenode start</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingNFSgateway">Starting NFS gateway</h4><p>Three daemons are required to provide NFS service:  portmap(or rpcbind), mountd and nfsd. The NFS gateway has both mountd and nfsd.</p><p><strong>To start the portmap and NFS gateway daemon:</strong></p><p>Run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service rpcbind stop
$ sudo service hadoop-hdfs-portmap start
$ sudo service hadoop-hdfs-nfs3 start</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-hdfs-portmap start
$ sudo /etc/init.d/hadoop-hdfs-nfs3 start</pre>
</div></div><p><strong>To mount the export "/":</strong></p><p>Make sure nfs-utils is installed on the client:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo yum install -y nfs-utils</pre>
</div></div><p>Then mount:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ mount -t nfs -o vers=3,proto=tcp,nolock &lt;nfsserver&gt;:/  &lt;mount_point&gt;</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-UsingHDFS">Using HDFS</h4><p>When the HDFS components are started, try some HDFS usage commands, for example:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u hdfs hdfs dfs -ls /
$ sudo -u hdfs hdfs dfs -mkdir -p /user/hadoop
$ sudo -u hdfs hdfs dfs -chown -R hadoop:hadoop /user/hadoop
$ sudo -u hdfs hdfs dfs -copyFromLocal /etc/passwd /user/hadoop/</pre>
</div></div> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<p>By default, the root folder is owned by user <code>hdfs</code>, so you have to use <code>sudo -u hdfs ***</code> to execute the first few commands.</p>
</div>
</div>
<h4 id="ManuallyInstallingPivotalHD2.0Stack-StoppingHDFS">Stopping HDFS</h4><p><strong>Stop the Namenode Daemon:</strong></p><p>Run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-hdfs-namenode stop</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-hdfs-namenode stop</pre>
</div></div><p><br/> <strong>Stop the Datanode Daemon:</strong></p><p>Run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-hdfs-datanode stop</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-hdfs-datanode stop</pre>
</div></div><p><strong>Stop the Secondary Namenode Daemon:</strong></p><p>Run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-hdfs-secondarynamenode stop</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-hdfs-secondarynamenode stop</pre>
</div></div><p><strong>Stop the NFS gateway Daemon:</strong></p><p>Run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-hdfs-portmap stop
$ sudo service hadoop-hdfs-nfs3 stop</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-hdfs-portmap stop
$ sudo /etc/init.d/hadoop-hdfs-nfs3 stop</pre>
</div></div><h2 id="ManuallyInstallingPivotalHD2.0Stack-HadoopYARN">Hadoop YARN</h2><p>This section provides instructions for installing each of the following core Hadoop YARN RPMs:</p><ul><li>YARN ResourceManager Setup</li><li>YARN NodeManager Setup</li><li>Mapreduce HistoryServer Setup</li><li>YARN ProxyServer Setup</li></ul><h3 id="ManuallyInstallingPivotalHD2.0Stack-HadoopYARNRPMPackages">Hadoop YARN RPM Packages</h3><p>Pivotal provides the following RPMs as part of this release. The core packages provide all executables, libraries, configurations, and documentation for Hadoop and is required on every node in the Hadoop cluster, as well as on the client workstation that will access the Hadoop service. The daemon packages provide a convenient way to manage Hadoop YARN daemons as Linux services, which rely on the core package.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>bigtop-utils, zookeeper-core</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hadoop core packages provides the common core packages for running Hadoop.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Every node in the Hadoop cluster and the client workstation that will access the Hadoop service.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-yarn-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hadoop YARN core packages provides common files for running YARN.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on all YARN nodes.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-yarn-resourcemanager-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-yarn</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop YARN ResourceManager, which provides a convenient method to manage ResourceManager start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on the Resource Manager node.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-yarn-nodemanager-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-yarn</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop YARN NodeManager, which provides a convenient method to manage NodeManager start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on all the Node Manager nodes.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-yarn-proxyserver-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-yarn</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop YARN ProxyServer, which provides a convenient method to manage ProxyServer start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on the node that will act as a proxy server from the user to applicationmaster</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-mapreduce-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-yarn</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hadoop Mapreduce core libraries.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on all ResourceManager and NodeManager nodes.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-mapreduce-historyserver-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop, hadoop-mapreduce</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Hadoop MapReduce HistoryServer, which provides a convenient method to manage MapReduce HistoryServer start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on the host that will be acting as the MapReduce History Server.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-doc-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Doc</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>N/A</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hadoop documentation package.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-client-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Library</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop, hadoop-hdfs, hadoop-yarn, hadoop-mapreduce</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>A set of symbolic link which gathers the libraries for programming Hadoop and submit Hadoop jobs.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Clients nodes that will be used to submit hadoop jobs.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites:CorePackageSetup.1">Prerequisites: Core Package Setup</h3><p>Perform the following steps on all the nodes in the Hadoop cluster and its client nodes:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/utility/rpm/bigtop-utils-&lt;PHD_BIGTOP_UTILS_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/zookeeper/rpm/zookeeper-&lt;PHD_ZOOKEEPER_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><p>Where <code>working_dir</code> is the directory where you want the rpms expanded.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-YARNResourceManagerSetup">YARN ResourceManager Setup</h3><p>Install the YARN ResourceManager package on the workstation that will serve as YARN ResourceManager:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-yarn-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-yarn-resourcemanager-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-YARNNodeManagerSetup">YARN NodeManager Setup</h3><p>Install the YARN NodeManager package on the workstation that will serve as YARN nodes:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-yarn-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-yarn-nodemanager-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-MapreduceHistoryServerSetup">Mapreduce HistoryServer Setup</h3><p>Install the YARN Mapreduce History Manager package and its dependency packages on the workstation that will serve as the MapReduce History Server:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-yarn-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-mapreduce-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-mapreduce-historyserver-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-YARNProxyServerSetup">YARN ProxyServer Setup</h3><p>Install the YARN Proxy Server package and its dependency packages on the workstation that will serve as the YARN Proxy Server.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-yarn-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm
$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-yarn-proxyserver-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-YARNConfiguration">YARN Configuration</h3><p>Yarn configuration files are located in the following directory:</p><p style="margin-left: 30.0px;"><code>/etc/gphd/hadoop/conf/</code></p><p>Refer to the Apache Hadoop documentation for how to configure YARN in distributed mode.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-YARNUsage">YARN Usage</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingYARN">Starting YARN</h4><p>YARN includes three services: ResourceManager (RM), NodeManager (NM), MapReduce HistoryManager (MRHM). RM and NM are required, MRHM is optional.</p><p>Before you start these services, create some working directories on HDFS, as follows:</p><p><strong>Create working directories on HDFS:</strong></p><p>The examples we show here are only examples; the exact steps depend upon your own environment and directory setup.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u hdfs hdfs dfs -mkdir /tmp
$ sudo -u hdfs hdfs dfs -chmod 777 /tmp
$ sudo -u hdfs hdfs dfs -mkdir -p /var/log/gphd/hadoop-yarn
$ sudo -u hdfs hdfs dfs -chown yarn:hadoop /var/log/gphd/hadoop-yarn
$ sudo -u hdfs hdfs dfs -mkdir -p /user/history
$ sudo -u hdfs hdfs dfs -chown mapred:hadoop /user/history
$ sudo -u hdfs hdfs dfs -chmod -R 777 /user/history
$ sudo -u hdfs hdfs dfs -mkdir -p /user/hadoop
$ sudo -u hdfs hdfs dfs -chown hadoop:hadoop /user/hadoop</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingResourceManager">Starting ResourceManager</h4><p>The RM daemon only needs to be started on the master node.</p><p>To start RM, run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-yarn-resourcemanager start</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-yarn-resourcemanager start</pre>
</div></div><p>Once RM is started, you can visit its dashboard at: <code> http://localhost:8088/</code></p><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingNodeManager">Starting NodeManager</h4><p>The NodeManager daemon needs to be started on all hosts that will be used as working nodes.</p><p>To start NodeManager, run either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-yarn-nodemanager start</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-yarn-nodemanager start</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartMapReduceHistoryServer">Start MapReduce HistoryServer</h4><p>MapReduce HistoryServer only needs to be run on the server that is meant to be the history server. It is an optional service and should only be enabled if you want to keep track of the MapReduce jobs that have been run.</p><p>To start the MapReduce History Server, run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-mapreduce-historyserver start</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-mapreduce-historyserver start</pre>
</div></div><p>When the MR HistoryServer is started, you can visit its dashboard at: <code>http://localhost:19888/</code></p><h4 id="ManuallyInstallingPivotalHD2.0Stack-UsingYARN">Using YARN</h4><p>After RM and NM are started, you can now submit YARN applications.</p><p>For simplicity, we assume you are running Hadoop in pseudo-distributed mode using the default pseudo configuration.</p> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<p>Before you start using YARN, make sure the HDFS daemons are running.</p>
</div>
</div>
<p>Here is an example MapReduce job:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ hadoop jar /usr/lib/gphd/hadoop-mapreduce/hadoop-mapreduce-examples-*.jar pi 2 200</pre>
</div></div><p>This runs the PI generation example. You can track the progress of this job at the RM dashboard: <code>http://localhost:8088/</code></p><p>You can also run other MapReduce examples, for example the following command will print a list of available examples:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ hadoop jar /usr/lib/gphd/hadoop-mapreduce/hadoop-mapreduce-examples-*.jar</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StoppingYARN">Stopping YARN</h4><p>Stop the YARN daemons manually by running the following commands.</p><p><strong>To stop the MapReduce HistoryServer Daemon:</strong></p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-mapreduce-historyserver stop</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-mapreduce-historyserver stop</pre>
</div></div><p><strong>To stop the NodeManager Daemon:</strong></p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-yarn-nodemanager stop</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-yarn-nodemanager stop</pre>
</div></div><p><strong>To stop the ResourceManager Daemon:</strong></p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hadoop-yarn-resourcemanager stop</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hadoop-yarn-resourcemanager stop</pre>
</div></div><h2 id="ManuallyInstallingPivotalHD2.0Stack-HadoopPseudo-distributedConfiguration">Hadoop Pseudo-distributed Configuration</h2><p>This section provides instructions for installing Hadoop Pseudo-distributed Configuration.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hadoop-conf-pseudo-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Configuration</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>hadoop-hdfs-datanode, hadoop-hdfs-secondarynamenode, hadoop-yarn-resourcemanager, hadoop-hdfs-namenode, hadoop-yarn-nodemanager, hadoop-mapreduce-historyserver,</p><p>hadoop-yarn-proxyserver</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>A set of configuration files for running Hadoop in pseudo-distributed mode on one single server.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Install on the pseudo--distributed host.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HadoopPseudo-distributedConfigurationSetup">Hadoop Pseudo-distributed Configuration Setup</h3><p>Hadoop configuration files are located in the following directory:</p><p style="margin-left: 30.0px;"><code>/etc/gphd/hadoop/conf/</code></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hadoop/rpm/hadoop-conf-pseudo-&lt;PHD_HADOOP_VERSION&gt;-&lt;nn&gt;.x86_64.rpm</pre>
</div></div><p> </p><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-Zookeeper"></span></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-Zookeeper">Zookeeper</h2><p>ZooKeeper is a high-performance coordination service for distributed applications.</p><p>This section describes how to install Zookeeper.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-ZookeeperRPMPackages">Zookeeper RPM Packages</h3><p>Pivotal HD provides the following RPMs as part of this release. The core package provides all executable, libraries, configurations, and documentation for Zookeeper and is required on every node in the Zookeeper cluster as well as the client workstation that will access the Zookeeper service. The daemon packages provide a convenient way to manage Zookeeper daemons as Linux services, which rely on the core package.</p> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<p>Zookeeper doesn't require Hadoop Core Packages.</p>
</div>
</div>
<div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>zookeeper-&lt;PHD_ZOOKEEPER_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>N/A</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Zookeeper core package, which provides the executable, libraries, configuration files and documentation.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Every node in the ZooKeeper cluster, and the client workstations which will access the ZooKeeper service.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>zookeeper-server-&lt;PHD_ZOOKEEPER_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Deamon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>ZooKeeper Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for Zookeeper server, which provides a convenient method to manage Zookeeper server start/stop as a Linux service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>N/A</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>zookeeper-doc-&lt;PHD_ZOOKEEPER_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Documentation</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>N/A</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Zookeeper documentation package.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-ZookeeperServerSetup">Zookeeper Server Setup</h3><p>Install the Zookeeper core package and the Zookeeper server daemon package on the workstation that will serve as the Zookeeper server, as follows:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/zookeeper/rpm/zookeeper-&lt;PHD_ZOOKEEPER_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/zookeeper/rpm/zookeeper-server-&lt;PHD_ZOOKEEPER_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><p>Where <code>working_dir</code> is the directory where you want the rpms expanded.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-ZookeeperClientSetup">Zookeeper Client Setup</h3><p>Install the Zookeeper core package on the client workstation to access the Zookeeper service, as follows:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/zookeeper/rpm/zookeeper-&lt;PHD_ZOOKEEPER_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-ZookeeperConfiguration">Zookeeper Configuration</h3><p>Zookeeper configuration files are located in the following directory</p><p style="margin-left: 30.0px;"><code>/etc/gphd/zookeeper/conf</code></p><p>This is the default configuration for quick reference and modification.</p><p>You can make modifications to these configuration templates or create your own configuration set.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.1">Usage</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingtheZookeeperDaemon">Starting the Zookeeper Daemon</h4><p>After installing the daemon package for Zookeeper, the Zookeeper server daemon by default starts automatically at system startup.</p><p>Start the daemons manually by running the following commands:</p><p>Either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service zookeeper-server start</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/zookeeper-server start</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-AccessingtheZookeeperservice">Accessing the Zookeeper service</h4><p>To access the Zookeeper service on a client machine, use the command zookeeper-client directly in the ZK shell:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ zookeeper-client
 In the ZK shell:
 &gt; ls
 &gt; create /zk_test my_data
 &gt; get /zk_test
 &gt; quit</pre>
</div></div><p><br/> You can get a list of available commands by inputting "?" in the Zookeeper shell.</p><h4 id="ManuallyInstallingPivotalHD2.0Stack-StoppingtheZookeeperDaemon">Stopping the Zookeeper Daemon</h4><p>Stop the Zookeeper server daemon manually by running the following commands:</p><p>Either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> sudo service zookeeper-server stop</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> sudo /etc/init.d/zookeeper-server stop</pre>
</div></div><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-HBase"></span></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-HBase">HBase</h2><p>HBase is a scalable, distributed database that supports structured data storage for large tables.</p><p>This section specifies how to install HBase.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites">Prerequisites</h3><p>As HBase is built on top of Hadoop and Zookeeper, the Hadoop and Zookeeper core packages must be installed for HBase to operate correctly.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HBaseRPMPackages">HBase RPM Packages</h3><p>Pivotal HD provides the following RPMs as part of this release. The core package provides all executables, libraries, configurations and documentation for HBase and is required on every node in the HBase cluster as well as on the client workstation that wants to access the HBase service. The daemon packages provide a convenient way to manage HBase daemons as Linux services, which rely on the core package.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hbase-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop HDFS Packages and ZooKeeper Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>HBase core package provides all executables, libraries, configuration files and documentations.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hbase-master-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>HBase Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for HMaster, which provides a convenient method to manage HBase HMaster server start/stop as a Linux service.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hbase-regionserver-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>HBase Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package for HRegionServer, which provides a convenient method to manage HBase HRegionServer start/stop as a Linux service.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hbase-thrift-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon (thrift service)</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>HBase Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide HBase service through thrift.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hbase-rest-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon (Restful service)</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>HBase Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide HBase service through REST.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hbase-doc-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Documentation</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>HBase documentation package.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HBaseMasterSetup">HBase Master Setup</h3><p>Install the HBase core package and the HBase master daemon package on the workstation that will serve as the HMaster:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hbase/rpm/hbase-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm 
$ sudo rpm -ivh working_dir/hbase/rpm/hbase-master-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HBaseRegionServerSetup">HBase RegionServer Setup</h3><p>Install the HBase core package and the HBase regionserver daemon package on the workstation that will serve as the HRegionServer:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hbase/rpm/hbase-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hbase/rpm/hbase-regionserver-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HBaseClientSetup">HBase Client Setup</h3><p>Install the HBase core package on the client workstation that will access the HBase service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hbase/rpm/hbase-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HBaseThriftServerSetup">HBase Thrift Server Setup</h3><p><strong> [OPTIONAL]</strong></p><p>Install the HBase core package and the HBase thrift daemon package to provide HBase service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hbase/rpm/hbase-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hbase/rpm/hbase-thrift-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-RESTServerSetup">REST Server Setup<strong> </strong></h3><p><strong>[OPTIONAL]</strong></p><p>Install the HBase core package and the HBase rest daemon package to provide HBase service through the Restful interface:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hbase/rpm/hbase-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hbase/rpm/hbase-rest-&lt;PHD_HBASE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HBaseConfiguration">HBase Configuration</h3><p>HBase configuration files are located in the following directory:</p><p style="margin-left: 30.0px;"><code>/etc/gphd/hbase/conf/</code></p><p>This is the default configuration for quick reference and modification.</p><p>You can make modifications to these configuration templates or create your own configuration set.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HBasePost-InstallationConfiguration">HBase Post-Installation Configuration</h3><ol><li>Login to one of the cluster nodes.</li><li><p>Create the <code> <code>hbase.rootdir</code> </code>:<code> <code> <br/> </code> </code></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u hdfs hdfs dfs -mkdir -p /hbase</pre>
</div></div></li><li><p>Set the ownership for <code>hbase.rootdir</code>:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u hdfs hdfs dfs -chown hbase:hadoop /hbase</pre>
</div></div></li><li><p>Add <code>hbase</code> user to the <code>hadoop</code> group if not already present, by running:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo usermod -G hadoop hbase</pre>
</div></div></li></ol><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.2">Usage</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingtheHBaseDaemon">Starting the HBase Daemon</h4><p>After installing the daemon package for HBase, the HBase server daemons by default start automatically at system startup.</p><p>Start the daemons manually by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hbase-master start</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hbase-master start</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingtheHRegionServerdaemon">Starting the HRegionServer daemon</h4><p>Start the daemons manually by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hbase-regionserver start</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hbase-regionserver start</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingtheHbaseThriftserverdaemon">Starting the Hbase Thrift server daemon</h4><p><strong> [OPTIONAL]</strong></p><p>Start the daemons manually by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hbase-thrift start</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hbase-thrift start</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingtheHbaseRestserverdaemon">Starting the Hbase Rest server daemon<strong> </strong></h4><p><strong>[OPTIONAL]</strong></p><p>Start the daemons manually by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hbase-rest start</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hbase-rest start</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-AccessingtheHBaseservice">Accessing the HBase service</h4><p>To access the HBase service on a client machine, use the command hbase directly in the shell:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ hbase</pre>
</div></div><p>Or you can use this command to enter the hbase console:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ hbase shell</pre>
</div></div><p>In the HBase shell, run some test commands, for example:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">hbase(main):003:0&gt; create 'test', 'cf'
hbase(main):003:0&gt; list 'test'
hbase(main):004:0&gt; put 'test', 'row1', 'cf:a', 'value1'
hbase(main):005:0&gt; put 'test', 'row2', 'cf:b', 'value2'
hbase(main):006:0&gt; put 'test', 'row3', 'cf:c', 'value3'
hbase(main):007:0&gt; scan 'test'
hbase(main):008:0&gt; get 'test',  'row1'
hbase(main):012:0&gt; disable 'test'
hbase(main):013:0&gt; drop 'test'
hbase(main):014:0&gt; quit</pre>
</div></div><p>Type <code>help</code> to get help for the HBase shell.</p><h4 id="ManuallyInstallingPivotalHD2.0Stack-StoppingtheHBasedaemon">Stopping the HBase daemon</h4><p>Stop the daemons manually by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hbase-master stop</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hbase-master stop</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StoppingtheHRegionServerdaemon">Stopping the HRegionServer daemon</h4><p>Stop the daemons manually by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hbase-regionserver stop</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hbase-regionserver stop</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StoppingtheHbaseThriftserverdaemon">Stopping the Hbase Thrift server daemon</h4><p><strong>[OPTIONAL]</strong></p><p>Stop the daemons manually by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hbase-thrift stop</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hbase-thrift stop</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StoppingtheHbaseRestserverdaemon">Stopping the Hbase Rest server daemon</h4><p><strong> [OPTIONAL]</strong></p><p>Stop the daemons manually by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hbase-rest stop</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hbase-rest stop</pre>
</div></div><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-Hive"></span></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-Hive">Hive</h2><p>Hive is a data warehouse infrastructure that provides data summarization and ad hoc querying.</p><p>This section specifies how to install Hive.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HiveComponents">Hive Components</h3><p>A Hive installation consists of the following components:</p><ul><li><code>hive-core</code></li><li><code>hive-metastore</code></li><li><code>hive-server</code></li><li><code>hive-server2</code></li></ul><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.1">Prerequisites</h3><p>As Hive is built on top of Hadoop, HBase and Zookeeper, the Hadoop, HBase and Zookeeper core packages must be installed for Hive to operate correctly. <br/> The following prerequisites must be also met before installing Hive:</p><ul><li>DB Server (we recommend using PostgresSQL)</li></ul><h3 id="ManuallyInstallingPivotalHD2.0Stack-HiveRPMPackages">Hive RPM Packages</h3><p>Hive consists of hive core, hive metastore daemon, hive server and hive server2 packages:</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hive-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop, HBase Core Packages</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hive core package provides the executables, libraries, configuration files and documentations.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Hive client &amp; server workstation</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hive-server-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon (hive server)</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hive Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide Hive service through thrift</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Hive server node</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hive-metastore-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Deamon (Metastore server)</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hive Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide Hive metadata information through metastore server.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Hive Metastore server node</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hive-server2-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon (hive server2)</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hive Core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide Hive Server2.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Hive server2 node</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-InstallingDBforHiveMetastore">Installing DB for Hive Metastore</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-SetupDB(RecommendusingPostgreSQL)onthehivemetastoreNode">Set up DB (Recommend using PostgreSQL) on the<code> hive metastore</code> Node</h4><ol><li><p>Install postgresql-server on hive metastore node as root:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo yum install postgresql-server</pre>
</div></div></li><li><p>Initialize the database:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service postgresql initdb</pre>
</div></div></li><li><p>Open the <code>/var/lib/pgsql/data/postgresql.conf</code> file and set the following values:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">listen_addresses = '*'
standard_conforming_strings = off</pre>
</div></div></li><li><p>Open the<code> /var/lib/pgsql/data/pg_hba.conf</code> file and comment out all the lines starting with <code>host</code> and <code>local</code> by adding <code>#</code> to start of the line.<br/>Then add the following lines:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">local all all trust
host all all 0.0.0.0 0.0.0.0 trust</pre>
</div></div></li><li><p>Create the <code>/etc/sysconfig/pgsql/postgresql</code> file and add the following parameter/value pair:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">PGPORT=10432</pre>
</div></div></li><li><p>Start the database:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service postgresql start</pre>
</div></div></li><li><p>Create the user, <code>database</code>:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u postgres createuser -p 10432 -D -S -R -P hive
$ sudo -u postgres createdb -p 10432 -O hive metastore </pre>
</div></div></li><li><p>Run postgres sql script to create hive schema in postgres db:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u postgres psql -p 10432 -d metastore -U hive -f /usr/lib/gphd/hive/scripts/metastore/upgrade/postgres/hive-schema-&lt;HIVE_VERSION&gt;.postgres.sql
</pre>
</div></div></li></ol><h3 id="ManuallyInstallingPivotalHD2.0Stack-HiveMetaStoreServerSetup">Hive MetaStore Server Setup</h3><p>Install the Hive core package and Hive Metastore daemon package to provide Hive metadata information through centralized Metastore service</p><ol><li><p>Install the Hive metastore:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo yum install postgresql-jdbc
$ sudo rpm -ivh working_dir/hive/rpm/hive-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hive/rpm/hive-metastore-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div></li><li><p>Open the<code> /etc/gphd/hive/conf/hive-site.xml</code> and change it to following:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">&lt;configuration&gt;
 &lt;property&gt;
   &lt;name&gt;javax.jdo.option.ConnectionPassword&lt;/name&gt;
   &lt;value&gt;hive&lt;/value&gt;
 &lt;/property&gt;
 &lt;property&gt;
   &lt;name&gt;hive.metastore.uris&lt;/name&gt;
   &lt;value&gt;thrift://&lt;CHANGE_TO_HIVE_METASTORE_ADDRESS&gt;:9083&lt;/value&gt;
 &lt;/property&gt;
 &lt;property&gt;
   &lt;name&gt;javax.jdo.option.ConnectionURL&lt;/name&gt;
   &lt;value&gt;jdbc:postgresql://&lt;CHANGE_TO_HIVE_METASTORE_ADDRESS&gt;:10432/metastore&lt;/value&gt;
 &lt;/property&gt;
 &lt;property&gt;
   &lt;name&gt;hive.hwi.war.file&lt;/name&gt;
   &lt;value&gt;/usr/lib/gphd/hive/lib/hive-hwi.war&lt;/value&gt;
 &lt;/property&gt;
 &lt;property&gt;
   &lt;name&gt;javax.jdo.option.ConnectionDriverName&lt;/name&gt;
   &lt;value&gt;org.postgresql.Driver&lt;/value&gt;
 &lt;/property&gt;
 &lt;property&gt;
   &lt;name&gt;datanucleus.autoCreateSchema&lt;/name&gt;
   &lt;value&gt;false&lt;/value&gt;
 &lt;/property&gt;
 &lt;property&gt;
   &lt;name&gt;javax.jdo.option.ConnectionUserName&lt;/name&gt;
   &lt;value&gt;hive&lt;/value&gt;
 &lt;/property&gt;
 &lt;property&gt;
   &lt;name&gt;hive.metastore.execute.setugi&lt;/name&gt;
   &lt;value&gt;true&lt;/value&gt;
 &lt;/property&gt;
 &lt;/configuration&gt; </pre>
</div></div><p><strong>Note</strong>: Replace <code>&lt;<em>CHANGE_TO_HIVE_METASTORE_ADDRESS</em>&gt;</code> in above file.</p></li><li><p>Link the postgresql jar file:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ ln -s /usr/share/java/postgresql-jdbc.jar /usr/lib/gphd/hive/lib/postgresql-jdbc.jar</pre>
</div></div></li><li><p>Start the hive-metastore:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ service hive-metastore start</pre>
</div></div></li></ol><h3 id="ManuallyInstallingPivotalHD2.0Stack-HiveServerSetup">Hive Server Setup<strong> </strong></h3><p><strong>[OPTIONAL]</strong></p><p>Install the Hive core package and Hive server  package to provide Hive service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> $ sudo rpm -ivh working_dir/hive/rpm/hive-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm
 $ sudo rpm -ivh working_dir/hive/rpm/hive-server-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-SetupPostgreSQLjdbcdriver">Set up PostgreSQL jdbc driver</h4><p>Copy the <code>postgresql-jdbc.jar </code>from the<code> HIVE_METASTORE </code>node  to <code>/usr/lib/gphd/hive/lib</code> on the <code>HIVE_SERVER</code> node</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HiveServer2Setup">Hive Server2 Setup<strong> </strong></h3><p><strong>[OPTIONAL]</strong></p><p>Install the Hive core package and Hive server2 package to provide Hive service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hive/rpm/hive-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hive/rpm/hive-server2-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><p>Open the<code> /etc/gphd/hive/conf/hive-site.xml</code> and add the following properties:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">&lt;property&gt;
  &lt;name&gt;hive.server2.thrift.port&lt;/name&gt;
  &lt;value&gt;&lt;CHANGE_TO_HIVE_SERVER2_PORT&gt;&lt;/value&gt;
  &lt;description&gt;Port number of HiveServer2 Thrift interface.
  Can be overridden by setting $HIVE_SERVER2_THRIFT_PORT&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
  &lt;name&gt;hive.server2.thrift.bind.host&lt;/name&gt;
  &lt;value&gt;&lt;CHANGE_TO_HIVE_SERVER2_HOSTNAME&gt;&lt;/value&gt;
  &lt;description&gt;Bind host on which to run the HiveServer2 Thrift interface.
  Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST&lt;/description&gt;
&lt;/property&gt;</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-SetupPostgreSQLjdbcdriver.1">Set up PostgreSQL jdbc driver</h4><p>Copy the  <code>postgresql-jdbc.jar</code> from the<code> HIVE_METASTORE</code> node  to <code>/usr/lib/gphd/hive/lib</code> <code>on the HIVE_SERVER2</code> node</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HiveConfiguration">Hive Configuration</h3><p>The Hive configuration files are located in the following directory:</p><p style="margin-left: 30.0px;"><code>/etc/gphd/hive/conf/</code></p><p>You can make modifications to this configuration template or create your own.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HivePost-installationConfiguration">Hive Post-installation Configuration</h3><ol><li><p>Login to one of the cluster nodes as root.</p></li><li><p>Create the <code>hive.warehouse.dir</code></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u hdfs hadoop fs -mkdir -p /user/hive/warehouse</pre>
</div></div></li><li><p>Set permissions for the <code> <code>hive.warehouse.dir<br/> </code> </code></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u hdfs hadoop fs -chmod 775 /user/hive/warehouse</pre>
</div></div></li><li><p>Set the ownership for the <code> <code>hive.warehouse.dir<br/> </code> </code></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo -u hdfs hadoop fs -chown hive:hadoop /user/hive/warehouse</pre>
</div></div></li><li><p>Add hive user to the hadoop group, if not already present, using:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo usermod -G hadoop hive</pre>
</div></div></li></ol><h3 id="ManuallyInstallingPivotalHD2.0Stack-HiveClientSetup">Hive Client Setup</h3><p>Hive is a Hadoop client-side library. Install the Hive core package on the client workstation:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> $ sudo rpm -ivh working_dir/hive/rpm/hive-&lt;PHD_HIVE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-SetupPostgreSQLjdbcdriver.2">Set up PostgreSQL jdbc driver</h4><p>Copy the <code>postgresql-jdbc.jar</code> from the<code> HIVE_METASTORE </code>node to <code>/usr/lib/gphd/hive/lib </code>on the <code>HIVE_CLIENT node</code></p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HiveUsage">Hive Usage</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-Start/StopHiveMetastoreServer">Start/Stop Hive Metastore Server</h4><p>Start/stop the Hive Metastore server daemon by running:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hive-metastore start
$ sudo service hive-metastore stop</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hive-metastore start
$ sudo /etc/init.d/hive-metastore stop</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-Start/StopHiveServer">Start/Stop Hive Server</h4><p><strong>[OPTIONAL]</strong></p><p>Start/stop the Hive server daemon by running:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hive-server start
$ sudo service hive-server stop</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hive-server start
$ sudo /etc/init.d/hive-server stop</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-Start/StopHiveServer2">Start/Stop Hive Server2<strong> </strong></h4><p><strong>[Optional]</strong></p><p>Start/stop Hive server2 daemon by running:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hive-server2 start
$ sudo service hive-server2 stop</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hive-server2 start
$ sudo /etc/init.d/hive-server2 stop</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartHiveServerClient">Start HiveServer Client</h4><p>To run Hive on a client machine, use the <code>hive</code> command directly in shell:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ hive</pre>
</div></div><p> </p><p>For example:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ hive -e "CREATE TABLE test(id string, name string);"
$ hive -e "show tables"
OK
test</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartHiveServer2Client">Start HiveServer2 Client</h4><p>HiveServer2 supports a new command shell <code>Beeline</code> that works with HiveServer2:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ beeline</pre>
</div></div><p> </p><p>For example:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ beeline
beeline&gt; !connect jdbc:hive2://&lt;hostname&gt; &lt;username&gt; &lt;password&gt; org.apache.hive.jdbc.HiveDriver
0: jdbc:hive2://localhost&gt; create table test1(id string, name string);
0: jdbc:hive2://localhost&gt; show tables;
+-----------+
| tab_name  |
+-----------+
| test      |
| test1     |
+-----------+</pre>
</div></div><p> </p><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-Hcatalog"></span></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-Hcatalog">Hcatalog</h2><p>Hcatalog is a metadata and table management system.</p><p>This section specifies how to install Hcatalog.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.2">Prerequisites</h3><p>Hcatalog is built on top of Hadoop, HBase , Hive and Zookeeper, so the Hadoop, HBase, Hive and Zookeeper core packages must be installed for Hcatalog to operate correctly.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HcatalogRPMPackages">Hcatalog RPM Packages</h3><p>Hcatalog consists of one core package, a thrift server daemon package that provides catalog service, and a web server daemon package that provides catalog service through http.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hcatalog-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop, HBase and Hive Core Packages.</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hcatalog core package provides the executables, libraries, configuration files and documentation.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Hcatalog Client workstation.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hcatalog-server-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon (hcatalog server).</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hcatalog Core Package.</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide Hive service through thrift.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Hcatalog server node.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>webhcat-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Libraries.</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hcatalog Core Package.</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide Hive metadata information through metastore server.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Webhcat server node.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>webhcat-server-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon(webhcata server).</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hcatalog and Webhcat Core Package.</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide Webhcat Server.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Webhcat server node.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HcatalogClientSetup">Hcatalog Client Setup</h3><p>Hcatalog is a Hadoop client-side library. Install the Hcatalog core package on the client workstation.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hive/rpm/hcatalog-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HcatalogServerSetup">Hcatalog Server Setup</h3><p><strong>[OPTIONAL]</strong></p><p>Install the Hcatalog core package and Hcatalog thrift daemon package to provide Hcatalog service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hcatalog/rpm/hcatalog-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hcatalog/rpm/hcatalog-server-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-WebhcatSetup">Webhcat Setup</h3><p><strong>[OPTIONAL]</strong></p><p>Install the Hcatalog core package and Webhcat package to provide Webhcat libraries:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hcatalog/rpm/hcatalog-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/hcatalog/rpm/webhcat-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-WebhcatServerSetup">Webhcat Server Setup</h3><p><strong>[OPTIONAL]</strong></p><p>Install the Hcatalog core package, Webhcat core package and Webhcat daemon package to provide Webhcat service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> $ sudo rpm -ivh working_dir/hcatalog/rpm/hcatalog-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm
 $ sudo rpm -ivh working_dir/hcatalog/rpm/webhcat-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm
 $ sudo rpm -ivh working_dir/hcatalog/rpm/webhcat-server-&lt;PHD_HCATALOG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-HcatalogConfiguration">Hcatalog Configuration</h3><p>The Hcatalog configuration files are located in the following directories:</p><p style="margin-left: 30.0px;"><code> /etc/gphd/hive/conf/ </code></p><p style="margin-left: 30.0px;"><code>/etc/gphd/hcatalog/conf/</code></p><p>You can make modifications to these configuration templates or create your own.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.3">Usage</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-Start/StopHcatalogServer">Start/Stop Hcatalog Server<strong> </strong></h4><p>Start/stop Hcatalog server daemon by running the following commands:</p><p>Either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service hcatalog-server start
$ sudo service hcatalog-server stop</pre>
</div></div><p>or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/hcatalog-server start
$ sudo /etc/init.d/hcatalog-server stop</pre>
</div></div> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<p>hcatalog-server and hive-metastore server can not run on the same node at the same time. These 2 service should be put on different nodes.</p>
</div>
</div>
<h4 id="ManuallyInstallingPivotalHD2.0Stack-Start/StopWebhcatServer">Start/Stop Webhcat Server</h4><p>Start/stop Webhcat server daemon by running the following commands:</p><p>Either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service webhcat-server start
$ sudo service webhcat-server stop</pre>
</div></div><p>or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/webhcat-server start 
$ sudo /etc/init.d/webhcat-server stop</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-Webhcatexample">Webhcat example</h4><p>With this example, we use webhcat to create tables on Hive:</p><p> </p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"># Create table use hive:
$ hive -e "CREATE TABLE test(id string, name string);" 
# Get table by using webhcat, you need to change hostname and username to appropriate value
$ curl -s 'http://&lt;hostname&gt;:50111/templeton/v1/ddl/database/default/table/test?user.name=username'
{"columns":[{"name":"id","type":"string"},{"name":"name","type":"string"}],"database":"default","table":"test"}</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartHcatalogClient">Start Hcatalog Client</h4><p>To run Hcatalog on a client machine, use the hive command directly in shell:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ hcat
$ cat test_data
key,value1
key,value2
key,value3
key,value4
$ hcat -e "CREATE TABLE hcat_test(key string, value string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','"
$ hive -e "LOAD DATA LOCAL INPATH 'test_data' OVERWRITE INTO TABLE hcat_test"
$ hive -e "SELECT COUNT(*) FROM hcat_test"
4</pre>
</div></div><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-Pig"></span></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-Pig">Pig</h2><p>Pig is a high-level data-flow language and execution framework for parallel computation.</p><p>This section specifies how to install Pig.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.3">Prerequisites</h3><p>As Pig is built on top of Hadoop the Hadoop package must be installed to run Pig correctly.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-PigRPMPackages">Pig RPM Packages</h3><p>Pig has only one core package.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>pig-&lt;PHD_PIG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop Core Packages</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Pig core package provides executable, libraries, configuration files and documentation.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Pig client workstation</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>pig-doc-&lt;PHD_PIG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Documentation</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>N/A</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Pig documentation package.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-PigClientSetup">Pig Client Setup</h3><p>Pig is a Hadoop client-side library. Install the Pig package on the client workstation:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/pig/rpm/pig-&lt;PHD_PIG_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-PigConfiguration">Pig Configuration</h3><p>Pig configuration files are located in the following directory:</p><p style="margin-left: 30.0px;"><code>/etc/gphd/pig/conf/</code></p><p>This is the default configuration templates for quick reference and modification.</p><p>You can modify these configuration templates or create your own configuration set.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.4">Usage</h3><p>To run Pig scripts on a client machine, use the command pig directly in shell:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ pig</pre>
</div></div><p>Check the pig command usage by running:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ pig -help</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-PiggybankUsage">Piggybank Usage</h3><p>Piggybank is a java library which includes a lot of useful Pig UDFs. Piggybank provides UDFs for different Pig storage functions, math functions, string functions and datetime functions, etc.</p><p>After you have installed the Pig rpm package, the piggybank library is also installed on the host.</p><p>The piggybank jar is in the following location:</p><p style="margin-left: 30.0px;"><code>/usr/lib/gphd/pig/piggybank.jar</code>.</p><p>The library jars that piggybank depends on are in the following location:</p><p style="margin-left: 30.0px;"><code>/usr/lib/gphd/pig/lib/</code></p><p>Use the following script to register the piggybank library in your pig script:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">register /usr/lib/gphd/pig/lib/avro-*.jar
register /usr/lib/gphd/pig/lib/commons-*.jar
register /usr/lib/gphd/pig/lib/groovy-all-*.jar
register /usr/lib/gphd/pig/lib/guava-*.jar
register /usr/lib/gphd/pig/lib/jackson-*.jar
register /usr/lib/gphd/pig/lib/joda-time-*.jar
register /usr/lib/gphd/pig/lib/json-simple-*.jar
register /usr/lib/gphd/pig/lib/parquet-pig-bundle-*.jar
register /usr/lib/gphd/pig/lib/protobuf-java-*.jar
register /usr/lib/gphd/pig/lib/snappy-java-*.jar
register /usr/lib/gphd/pig/piggybank.jar</pre>
</div></div><p>Here are some notes for using Hive storage (such as HiveColumnarStorage) in piggybank.</p><ul><li>PHD hive must be installed. Please refer to the <a href="#ManuallyInstallingPivotalHD2.0Stack-Hive">Hive section</a> for hive installation.</li><li>You can register piggybank dependency jars as needed in your pig script with the above code.</li><li><p>Additionally, use the following pig code to register hive jars in your script</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">register /usr/lib/gphd/hive/hive-exec-*.jar
register /usr/lib/gphd/hive/hive-common-*.jar</pre>
</div></div></li></ul><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-Mahout"></span></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-Mahout">Mahout</h2><p>Mahout is a scalable machine learning and data mining library.</p><p>This section specifies how to install Mahout.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.4">Prerequisites</h3><p>Mahout is built on top of Hadoop, so the Hadoop package must be installed to get Mahout running.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-MahoutRPMPackages">Mahout RPM Packages</h3><p>Mahout has only one core package.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>mahout-&lt;PHD_MAHOUT_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop Core Packages</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Mahout core package provides executable, libraries, configuration files and documentation.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Mahout client workstation</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-MahoutClientSetup">Mahout Client Setup</h3><p>Mahout is a Hadoop client-side library. Install the Mahout package on the client workstation:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> $ sudo rpm -ivh working_dir/mahout/rpm/mahout-&lt;PHD_MAHOUT_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-MahoutConfiguration">Mahout Configuration</h3><p>Mahout configuration files are located in the following directory:</p><p style="margin-left: 30.0px;"><code>/etc/gphd/mahout/conf/</code></p><p>This contains the default configuration templates for quick reference and modification.</p><p>You can modify these configuration templates or create your own configuration set.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.5">Usage</h3><p>To run Mahout scripts on a client machine, use the command mahout directly in the shell:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> $ mahout PROGRAM</pre>
</div></div><p><br/> Check the full list of mahout programs by running:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ mahout</pre>
</div></div><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-Flume"></span></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-Flume">Flume</h2><p>Flume is a distributed, reliable, and available service for efficiently collecting, aggregating, and moving large amounts of log data. It has a simple and flexible architecture based on streaming data flows. It is robust and fault tolerant with tunable reliability mechanisms and many failover and recovery mechanisms. It uses a simple extensible data model that allows for online analytic application. For more info, please refer to the Apache Flume page: <a class="external-link" href="http://flume.apache.org/" rel="nofollow"> http://flume.apache.org/</a></p><p>This section specifies how to install Flume.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.5">Prerequisites</h3><p>As Flume is built on top of Hadoop, the Hadoop package must be installed to get Flume running correctly. <br/> (Hadoop core and hadoop hdfs should be installed)</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-FlumeRPMPackages">Flume RPM Packages</h3><p>Flume consists of one core package and a flume-agent sever daemon package.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>flume-&lt;PHD_FLUME_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop Core Packages</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Flume core package provides executable, libraries, configuration files and documentations.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Flume client workstation.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>flume-agent-&lt;PHD_FLUME_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon (Flume Agent server)</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Flume core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide Flume service for generating, processing, and delivering data.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Flume agent server node.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-FlumeClientSetup">Flume Client Setup</h3><p>Flume is a Hadoop client-side library. Install the Flume package on the client workstation: </p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/flume/rpm/flume-&lt;PHD_FLUME_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<p>User <code>flume</code> and group <code>flume</code> should be created with correct configuration, including <code>uid</code>, <code>gid</code>, <code>home_dir</code> and <code>shell</code>.</p><p>Check in the following paths: <code>/etc/passwd</code>,<code> /etc/group</code></p>
</div>
</div>
<h3 id="ManuallyInstallingPivotalHD2.0Stack-FlumeAgentSetup">Flume Agent Setup</h3><p><strong>[OPTIONAL]</strong></p><p>Install the Flume core package and Flume agent daemon package to provide Flume service for generating, processing, and delivering data:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/flume/rpm/flume-&lt;PHD_FLUME_VERSION&gt;-&lt;nn&gt;.noarch.rpm 
$ sudo rpm -ivh working_dir/flume/rpm/flume-agent-&lt;PHD_FLUME_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-FlumeAgentConfiguration">Flume Agent Configuration</h3><p>Flume configuration files are located in the following directory:</p><p style="margin-left: 30.0px;"><code> /etc/gphd/flume/conf/</code></p><p>This contains the default configuration templates for quick reference and modification.</p><p>You can modify these configuration templates or create your own configuration set.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.6">Usage</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingFlumeClient">Starting Flume Client</h4><p>Simple configuration example:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ cat example.conf 
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1</pre>
</div></div><p> </p><p>Run Flume scripts on a client machine:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"># terminal 1:

$ flume-ng agent --conf conf --conf-file example.conf --name a1 -Dflume.root.logger=INFO,console
14/03/19 06:00:42 INFO source.NetcatSource: Created serverSocket:sun.nio.ch.ServerSocketChannelImpl[/127.0.0.1:44444]
14/03/19 06:01:18 INFO sink.LoggerSink: Event: { headers:{} body: 68 65 6C 6C 6F 0D                               hello. }

# terminal 2:

$ telnet localhost 44444
Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
hello
OK
</pre>
</div></div><p> </p><h4 id="ManuallyInstallingPivotalHD2.0Stack-Starting/StoppingFlumeAgentServer">Starting/Stopping Flume Agent Server</h4><p>Flume agent configuration example:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ cat /etc/gphd/flume/conf/flume.conf 
agent.sources = r1
agent.sinks = k1
agent.channels = c1

# Describe/configure the source
agent.sources.r1.type = netcat
agent.sources.r1.bind = localhost
agent.sources.r1.port = 44444

# Describe the sink
agent.sinks.k1.type = hdfs
agent.sinks.k1.hdfs.path = hdfs://localhost/user/flume/
agent.sinks.k1.hdfs.fileType = DataStream

# Use a channel which buffers events in memory
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
agent.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
agent.sources.r1.channels = c1
agent.sinks.k1.channel = c1</pre>
</div></div><p> </p><p>Start/stop the Flume agent server daemon by running the following commands:</p><p>Run:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service flume-agent start
$ sudo service flume-agent stop
$ sudo service flume-agent status</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/flume-agent start
$ sudo /etc/init.d/flume-agent stop
$ sudo /etc/init.d/flume-agent status</pre>
</div></div><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-Sqoop"></span></p><h2 id="ManuallyInstallingPivotalHD2.0Stack-Sqoop">Sqoop</h2><p>Sqoop is a tool designed for efficiently transferring bulk data between <a class="external-link" href="http://hadoop.apache.org/" rel="nofollow">Apache Hadoop</a> and structured datastores such as relational databases. For more details, refer to the Apache Sqoop page: <a class="external-link" href="http://sqoop.apache.org/" rel="nofollow"> http://sqoop.apache.org/</a></p><p>This section specifies how to install Sqoop.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.6">Prerequisites</h3><p>As Sqoop is built on top of Hadoop and HBase, the Hadoop and HBase package must be installed to get Sqoop running correctly.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-SqoopRPMPackages">Sqoop RPM Packages</h3><p>Sqoop consists of one core package and a sqoop-metastore sever daemon package.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>sqoop-&lt;PHD_SQOOP_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hadoop, HBase Core Packages</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Sqoop core package provides executable, libraries, configuration files and documentations.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Sqoop. client workstation</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>sqoop-metastore-&lt;PHD_SQOOP_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon (Sqoop Metastore server)</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Sqoop core Package</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon scripts package to provide shared metadata repository for Sqoop.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Sqoop metastore server node</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-SqoopClientSetup">Sqoop Client Setup</h3><p>Sqoop is a Hadoop client-side library. Install the Sqoop package on the client workstation:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/sqoop/rpm/sqoop-&lt;PHD_SQOOP_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-SqoopMetastoreSetup">Sqoop Metastore Setup</h3><p><strong>[OPTIONAL]</strong></p><p>Install the Sqoop core package and Sqoop metastore package to provide shared metadata repository for Sqoop. sqoop-metastore has a dependency with sqoop-core package:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/sqoop/rpm/sqoop-&lt;PHD_SQOOP_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/sqoop/rpm/sqoop-metastore-&lt;PHD_SQOOP_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-SqoopMetastoreConfiguration">Sqoop  Metastore Configuration</h3><p>The Sqoop metastore configuration files are in the following location:</p><p style="margin-left: 30.0px;"><code>/etc/gphd/sqoop/conf/</code></p><p>These are the default configuration templates for quick reference and modification.</p><p>You can modify these configuration templates or create your own configuration set.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.7">Usage</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-Starting/StoppingSqoopMetastoreServer">Starting/Stopping Sqoop Metastore Server<strong> <br/> </strong></h4><p>Start/stop Sqoop metastore server daemon by running the following commands:</p><p>Either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo service sqoop-metastore start
$ sudo service sqoop-metastore stop
$ sudo service sqoop-metastore status</pre>
</div></div><p>Or</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/sqoop-metastore start
$ sudo /etc/init.d/sqoop-metastore stop
$ sudo /etc/init.d/sqoop-metastore status</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-StartingSqoopClient">Starting Sqoop Client</h4><p>To run Sqoop scripts on a client machine, use the command sqoop directly in the shell:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sqoop</pre>
</div></div><p>Check the sqoop command usage by running:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sqoop help</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-SqoopClientExample">Sqoop Client Example</h4><p>In this example, you are using sqoop to import a MySQL database table into HDFS.</p><p>To run this example, in addition to a correctly installed and configured PHD, you also need:</p><ol><li>Install and run MySQL instance correctly (refer to MySQL's official document at <a class="external-link" href="http://dev.mysql.com/doc/index-topic.html" rel="nofollow">http://dev.mysql.com/doc/index-topic.html</a>).</li><li>Install MySQL official JDBC driver and copy <code>mysql-connector-java.jar </code>into <code>/usr/lib/gphd/sqoop/lib.</code></li><li>Create MySQL database test and MySQL table student.</li></ol><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ mysql
mysql&gt; use test;
mysql&gt; CREATE TABLE student (id INT PRIMARY KEY, name VARCHAR(100));
mysql&gt; insert into student (id, name) values (1, "John");
mysql&gt; insert into student (id, name) values (2, "Mike");
mysql&gt; insert into student (id, name) values (3, "Tom");
mysql&gt; exit</pre>
</div></div><p>Then run sqoop to import the table to HDFS:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">$ sqoop import --connect jdbc:mysql://&lt;mysql server host&gt;/test --table student --username &lt;username&gt; --target-dir hdfs://&lt;namenode host&gt;/tmp/sqoop_output</pre>
</div></div><p>Where:</p><p style="margin-left: 30.0px;"> <code>&lt;mysql server host&gt;</code> is the host name on which your MySQL instance is running</p><p style="margin-left: 30.0px;"><code>&lt;username&gt;</code> is the user name of the user running this command</p><p style="margin-left: 30.0px;"> <code>&lt;namenode host&gt;</code> is the host name on which your name node is running.</p><p>Ensure the path you specified with the <code>--target-dir</code> option in the above command  is a valid path on HDFS and the user account you are using has write permission on that location. </p><p>This should start a map/reduce job. Once it has finished, you should find files under the HDFS path you specified with <code>--target-dir</code>,  and you should find the file content is the data of your MySQL table.</p><h2 id="ManuallyInstallingPivotalHD2.0Stack-Oozie">Oozie</h2><p>Oozie is a workflow scheduler system to manage Apache Hadoop jobs.</p><p>This section specifies how to install Oozie.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.7">Prerequisites</h3><p>Oozie is built on top of Hadoop, so Hadoop packages must be installed to get Oozie running. See the Hadoop section for Hadoop installation instructions, Oozie can manipulate Hive jobs and Pig jobs in the workflow. So if you want to use Hive jobs or Pig jobs in your workflow, Hive and Pig packages must be installed. See the Hive section and Pig section for their installation instructions.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-OozieRPMPackages">Oozie RPM Packages</h3><p>Oozie contains an oozie-client rpm package and an oozie package. The Oozie package depends on the oozie-client package.</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>oozie-client-&lt;PHD_OOZIE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Client and Core</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>bigtop-util, hadoop-client</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Oozie client package provides the oozie library and client binary to connect to Oozie service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Oozie service node and Oozie client node </p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>oozie-&lt;PHD_OOZIE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Daemon(Oozie server)</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>bigtop-tomcat, hadoop-client, oozie-client;</p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Daemon package to provide Oozie service.</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Oozie service node</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-OozieclientSetup">Oozie client Setup</h3><p>Install the oozie-client package on the client host that submits workflows to Oozie service.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/utility/rpm/bigtop-utils-&lt;PHD_BIGTOP_UTILS_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/oozie/rpm/oozie-client-&lt;PHD_OOZIE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<p>User "<code>oozie</code>" and group "<code>oozie</code>" are created with correct configuration (uid oozie, gid oozie). It is a non-login user.</p>
</div>
</div>
<h3 id="ManuallyInstallingPivotalHD2.0Stack-OozieserverSetup[Optional]">Oozie server Setup [Optional]</h3><p>Install the oozie-client package and oozie package to provide Oozie service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/utility/rpm/bigtop-utils-&lt;PHD_BIGTOP_UTILS_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/utility/rpm/bigtop-tomcat-&lt;PHD_BIGTOP_TOMCAT_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/oozie/rpm/oozie-client-&lt;PHD_OOZIE_VERSION&gt;-&lt;nn&gt;.noarch.rpm
$ sudo rpm -ivh working_dir/oozie/rpm/oozie-&lt;PHD_OOZIE_VERSION&gt;-&lt;nn&gt;.noarch.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-OozieConfiguration">Oozie Configuration</h3><p>Oozie configuration files are located in the following directory:<code> </code></p><p style="margin-left: 30.0px;"><code>/etc/gphd/oozie/conf/</code></p><p>This contains the default configuration templates for quick reference and modification.</p><p>You can modify these configuration templates or create your own configuration set.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Oozieenvironmentconfiguration">Oozie environment configuration</h3><p>You can overwrite the oozie environment as long as exporting vars in <code>/etc/gphd/oozie/conf/oozie-env.sh</code></p><p>For example, if you want to define the oozie data directory, export <code>OOZIE_DATA</code> in <code>/etc/gphd/oozie/conf/oozie-env.sh</code>:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">export OOZIE_DATA=&lt;YOUR_PATH&gt;</pre>
</div></div><p>Make sure that the owner and user group of<code> &lt;YOUR_PATH&gt;</code> is <code>oozie:oozie</code>.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-SetupDatabase">Setup Database</h3><p><strong>[OPTIONAL]<br/></strong></p><p>By default, Oozie is configured to use Embedded Derby, however Oozie also works with HSQL, Derby, MySQL, Oracle and PostgreSQL databases.</p><p>Oozie bundles the JDBC drivers for HSQL, Embedded Derby and PostgreSQL.</p><p>HSQL is normally used for testcases as it is an in-memory database and all data is lost everytime Oozie is stopped.</p><p>If you are using MySQL, Oracle or PostgreSQL, the Oozie database schema must be created. By default, Oozie creates its tables automatically.</p><p>The <code>bin/addtowar.sh</code> and the <code>oozie-setup.sh</code> scripts have an option <code>-jars</code> that can be used to add the Oracle or MySQL JDBC driver JARs to the Oozie WAR file.</p><p>The SQL database used by Oozie is configured using the following configuration properties (default values shown):</p><div class="section"><pre>  oozie.db.schema.name=oozie
  oozie.service.JPAService.create.db.schema=true
  oozie.service.JPAService.validate.db.connection=false
  oozie.service.JPAService.jdbc.driver=org.apache.derby.jdbc.EmbeddedDriver
  oozie.service.JPAService.jdbc.url=jdbc:derby:${oozie.data.dir}/${oozie.db.schema.name}-db;create=true
  oozie.service.JPAService.jdbc.username=sa
  oozie.service.JPAService.jdbc.password=
  oozie.service.JPAService.pool.max.active.conn=10
</pre><p>These values should be changed to match the database you are using.</p> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<div class="message-content"><ul><li>If the <code>oozie.db.schema.create</code> property is set to <code>true </code>(default) the Oozie tables are created automatically if they are not found in the database at Oozie start-up time. In a production system this option should be set to <code>false </code>once the database tables have been created.</li><li>If the <code>oozie.db.schema.creat</code>e property is set to true, the <code>oozie.service.JPAService.validate.db.connection </code>property value is ignored and Oozie handles it as set to <code>false</code>.</li></ul></div>
</div>
</div>
<p> </p></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.8">Usage</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-OozieClient">Oozie Client</h4><p>To run Oozie scripts on a client machine, use the command <code>oozie</code> with the sub-command directly in shell. Each sub-command may have different arguments.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ oozie [sub-command]</pre>
</div></div><p>Check the oozie command usage by running:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ oozie help</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-InitializeOozieserver">Initialize Oozie server</h4><p><strong>[OPTIONAL]</strong></p><p>Before starting Oozie service, follow the steps below to initialize Oozie server.</p><ol><li><p>Add the following configuration to the Hadoop configuration <code>core-site.xml</code>. Then restart HDFS and Yarn</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: html/xml; gutter: false" style="font-size:12px;">&lt;property&gt;
  &lt;name&gt;hadoop.proxyuser.oozie.hosts&lt;/name&gt;
  &lt;value&gt;*&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
  &lt;name&gt;hadoop.proxyuser.oozie.groups&lt;/name&gt;
  &lt;value&gt;*&lt;/value&gt;
&lt;/property&gt;</pre>
</div></div></li><li><p><code>mkdir</code> for user <code>oozie</code> on HDFS:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo -u hdfs hdfs dfs -mkdir -p /user/oozie
$ sudo -u hdfs hdfs dfs -chown oozie /user/oozie</pre>
</div></div></li><li><p>Create the oozie database:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo service oozie init</pre>
</div></div></li><li><p>Download <code>extjs-2.2</code> from here <a class="external-link" href="http://extjs.com/deploy/ext-2.2.zip" rel="nofollow" style="text-decoration: underline;">http://extjs.com/deploy/ext-2.2.zip</a>. Put the zip file in a new directory named <code>/tmp/oozie-libext</code>.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ wget http://extjs.com/deploy/ext-2.2.zip
$ mkdir -p /tmp/oozie-libext
$ mv ext-2.2.zip /tmp/oozie-libext</pre>
</div></div></li><li><p>Setup the oozie tomcat <code>war</code> file:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo -u oozie oozie-setup prepare-war -d /tmp/oozie-libext/</pre>
</div></div></li><li><p>Setup sharelib for oozie service. Replace <code>namenode-host</code> with your name node hostname, and replace <code>namenode-port</code> with your name node port:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo -u oozie oozie-setup sharelib \
create -fs hdfs://&lt;namenode-host&gt;:&lt;namdenode-port&gt; \
-locallib /usr/lib/gphd/oozie/oozie-sharelib.tar.gz</pre>
</div></div></li></ol><h4 id="ManuallyInstallingPivotalHD2.0Stack-Start/StopOozieServer[Optional]">Start/Stop Oozie Server [Optional]</h4><p>Start/stop Oozie server by running the following commands:</p><p>Either:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo service oozie start
$ sudo service oozie stop
$ sudo service oozie status</pre>
</div></div><p>Or:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo /etc/init.d/oozie start
$ sudo /etc/init.d/oozie stop
$ sudo /etc/init.d/oozie status</pre>
</div></div><h4 id="ManuallyInstallingPivotalHD2.0Stack-SubmitOozieexampleworkflows">Submit Oozie example workflows</h4><ol><li><p>Expand the examples:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ mkdir /tmp/oozie-example
$ cd /tmp/oozie-example
$ tar xzf /usr/lib/gphd/oozie/oozie-examples.tar.gz</pre>
</div></div></li><li><p>Change the job properties in the examples.<br/>Change the following files:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">/tmp/oozie-example/examples/apps/map-reduce/job.properties
/tmp/oozie-example/examples/apps/hive/job.properties
/tmp/oozie-example/examples/apps/pig/job.properties</pre>
</div></div><p>In each file, set the following properties:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">nameNode=hdfs://&lt;namenode-host&gt;:&lt;namenode-port&gt;
jobTracker=&lt;resource-manager-host&gt;:&lt;resource-manager-port&gt;</pre>
</div></div><p>Use the exact hostname and service port in your cluster. </p><p> </p></li><li>Edit the Oozie <code>workflow.xml</code> as follows:<br/><p>The Oozie <code>workflow.xml</code> is in the following directory:</p><p><code>/tmp/oozie-example/examples/apps/hive</code></p><p>Add the NameNode variable as a prefix to all paths, for example:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">&lt;param&gt;INPUT=${nameNode}/user/${wf:user()}/${examplesRoot}/input-data/table&lt;/param&gt;
&lt;param&gt;OUTPUT=${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive&lt;/param&gt;</pre>
</div></div><div><div class="syntaxhighlighter nogutter java"><p> </p></div></div><p>Also make sure to reference the <code>hive-oozie-site.xml</code> using the <code>job-xml</code> tag in the workflow. The <code>&lt;job-xml&gt;</code> element needs to be put inside the <code>&lt;hive&gt;</code> element between the <code>&lt;prepare&gt;</code> and <code>&lt;configuration&gt;</code> elements in the <code>examples/apps/hive/workflow.</code> <code>xml</code> file, as shown below:</p><div><div class="syntaxhighlighter nogutter java"><p> </p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">&lt;workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-wf"&gt;
    &lt;start to="hive-node"/&gt;
    &lt;action name="hive-node"&gt;
        &lt;hive xmlns="uri:oozie:hive-action:0.2"&gt;
            &lt;job-tracker&gt;${jobTracker}&lt;/job-tracker&gt;
            &lt;name-node&gt;${nameNode}&lt;/name-node&gt;
            &lt;prepare&gt;
                &lt;delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive"/&gt;
                &lt;mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data"/&gt;
            &lt;/prepare&gt;
            &lt;job-xml&gt;${nameNode}/user/oozie/hive-oozie-site.xml&lt;/job-xml&gt;
            &lt;configuration&gt;
                &lt;property&gt;
                    &lt;name&gt;mapred.job.queue.name&lt;/name&gt;
                    &lt;value&gt;${queueName}&lt;/value&gt;
                &lt;/property&gt;
            &lt;/configuration&gt;
            &lt;script&gt;script.q&lt;/script&gt;
            &lt;param&gt;INPUT=${nameNode}/user/${wf:user()}/${examplesRoot}/input-data/table&lt;/param&gt;
            &lt;param&gt;OUTPUT=${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive&lt;/param&gt;
        &lt;/hive&gt;
        &lt;ok to="end"/&gt;
        &lt;error to="fail"/&gt;
    &lt;/action&gt;
    &lt;kill name="fail"&gt;
        &lt;message&gt;Hive failed, error message[${wf:errorMessage(wf:lastErrorNode())}]&lt;/message&gt;
    &lt;/kill&gt;
    &lt;end name="end"/&gt;
&lt;/workflow-app&gt;</pre>
</div></div></div></div></li><li><p>Put example code onto HDFS:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ hdfs dfs -put examples /user/&lt;username&gt;</pre>
</div></div><p>Where <code>&lt;username&gt;</code> is the name of user who issues this command.</p><p> </p></li><li>Submit a map reduce example workflow<ol><li><p>Submit workflow:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ oozie job -oozie http://localhost:11000/oozie -config examples/apps/map-reduce/job.properties  -run
job: &lt;oozie-job-id&gt;</pre>
</div></div></li><li><p>Check workflow status. <br/>Where <code>&lt;oozie-job-id&gt;</code> is the same id in the output of the last command.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ oozie job -oozie http://localhost:11000/oozie -info &lt;oozie-job-id&gt;</pre>
</div></div></li></ol></li><li>Oozie Setup for Hive:<br/> <br/><ol><li><p>Remote Metastore Mode (recommended):<br/> <br/>Put the Hive jars into the Tomcat class loader path. Make the following change in the<code> /var/lib/gphd/oozie/tomcat-deployment/conf/catalina.properties</code> file:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">common.loader=/var/lib/gphd/oozie/*.jar,/usr/lib/gphd/hadoop/client/*.jar,/usr/lib/gphd/hive/lib/*.jar,/usr/lib/gphd/oozie/libtools/*.jar,/usr/lib/gphd/oozie/oozie-core/*.jar,${catalina.home}/lib,${catalina.home}/lib/*.jar</pre>
</div></div></li><li><p>Local Metastore Mode:<br/> <br/> Upload the JDBC driver to Oozie sharelib <br/> <br/>To enable the local metastore mode, comment out the <code>hive.metastore.uris </code>property and verify that Hive still works properly at the command-line.In local metastore mode, Oozie hive actions do not connect to the Hive Metastore, but instead talk to the database directly. In this setup, the appropriate JDBC driver (for example, for Postgres) needs to be made available to hive jobs running within Oozie:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> hdfs dfs -put /usr/lib/gphd/hive/lib/postgresql-jdbc.jar /user/oozie/share/lib/hive</pre>
</div></div></li></ol></li><li>Submit the Hive example workflow.<br/> <br/><ol><li><p>Upload the Hive configuration file onto HDFS</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo -u oozie hdfs dfs -put /etc/gphd/hive/conf/hive-site.xml /user/oozie/hive-oozie-site.xml</pre>
</div></div> <div class="aui-message warning shadowed information-macro">
<span class="aui-icon icon-warning">Icon</span>
<div class="message-content">
<p>When uploading a Hive configuration file to HDFS, do not use<code> hive-site.xml</code> as the file name. This is because Hive action in Oozie overwrites the <code>hive-site.xml</code> file.</p><p>In the Oozie workflow file, use <code>&lt;job-xml&gt;${nameNode}/user/oozie/hive-oozie-site.xml&lt;/job-xml&gt;</code> to refer to the Hive configuration file.</p>
</div>
</div>
</li><li><p>Submit workflow:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ oozie job -oozie http://localhost:11000/oozie -config examples/apps/hive/job.properties  -run
job: &lt;oozie-job-id&gt;</pre>
</div></div></li><li><p>Check workflow status.</p><p>Where <code>&lt;oozie-job-id&gt;</code> is the same id in the output of last command.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ oozie job -oozie http://localhost:11000/oozie -info &lt;oozie-job-id&gt;</pre>
</div></div></li></ol></li><li>Submit a Pig example workflow.<ol><li><p>Submit workflow:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ oozie job -oozie http://localhost:11000/oozie -config examples/apps/pig/job.properties  -run
job: &lt;oozie-job-id&gt;</pre>
</div></div></li><li><p>Check the workflow status.</p><p>Where <code>&lt;oozie-job-id&gt;</code> is the same id in the output of the last command.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ oozie job -oozie http://localhost:11000/oozie -info &lt;oozie-job-id&gt;</pre>
</div></div></li></ol></li></ol><h3 id="ManuallyInstallingPivotalHD2.0Stack-OozieinHAMode-BestPractices">Oozie in HA Mode - Best Practices</h3><ul><li>Ensure that HA is configured correctly and identically on all nodes, including client nodes. Specifically, ensure that the following variables are set appropriately in <code>hdfs-site.xml</code>:</li></ul><p style="margin-left: 60.0px;"><code> dfs.nameservices</code></p><p style="margin-left: 60.0px;"><code>dfs.ha.namenodes.nameservice ID  </code></p><p style="margin-left: 60.0px;"><code>dfs.namenode.rpc-address.nameservice ID.name node ID </code></p><p style="margin-left: 60.0px;"><code>dfs.namenode.http-address.nameservice ID.name node ID </code></p><p style="margin-left: 60.0px;"><code>dfs.namenode.shared.edits.dir </code></p><p style="margin-left: 60.0px;"><code>dfs.client.failover.proxy.provider.nameservice ID </code></p><p style="margin-left: 60.0px;"><code>dfs.ha.fencing.methods </code></p><p style="margin-left: 30.0px;">And in <code>core-site.xml</code>:</p><p style="margin-left: 60.0px;"><code> fs.defaultFS</code></p><ul><li><p>Use the namenode HA service in <code>mapreduce.job.hdfs-servers</code> in <code>yarn-site.xml</code>:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">&lt;property&gt;
    &lt;name&gt;mapreduce.job.hdfs-servers&lt;/name&gt;
    &lt;value&gt;hdfs://test&lt;/value&gt;
&lt;/property&gt;</pre>
</div></div></li></ul><ul><li>While using Namenode HA, create all tables using the HA service as the HDFS location:</li></ul><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">CREATE EXTERNAL TABLE test (a INT) STORED AS TEXTFILE LOCATION 'hdfs://test/user/myuser/examples/input-data/table/';</pre>
</div></div><div><div class="syntaxhighlighter nogutter java"><p> </p></div></div><ul><li>Verify that all tables in Hive are created using the HA service as the HDFS location (note the location in the example below refers to <code> hdfs://test/ </code>, which is the HA service.)</li></ul><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">hive&gt; describe extended mytable;
OK
a                       int                     None
  
Detailed
Table Information      Table(tableName:mytable, dbName:default, 
owner:gpadmin@PIVOTAL, createTime:1391839636, lastAccessTime:0, 
retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:int, comment:null)], location:hdfs://test/user/gpadmin/examples/input-data/mytable,
inputFormat:org.apache.hadoop.mapred.TextInputFormat, 
outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat,
compressed:false,
numBuckets:-1, serdeInfo:SerDeInfo(name:null, 
serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, 
parameters:{serialization.format=1}),
bucketCols:[], sortCols:[], parameters:{}, 
skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], 
skewedColValueLocationMaps:{}), storedAsSubDirectories:false),
partitionKeys:[], parameters:{EXTERNAL=TRUE, 
transient_lastDdlTime=1391839636}, viewOriginalText:null, 
viewExpandedText:null, tableType:EXTERNAL_TABLE)</pre>
</div></div><div><div class="syntaxhighlighter nogutter java"><p> </p></div></div><h2 id="ManuallyInstallingPivotalHD2.0Stack-Hamster">Hamster</h2><p>Hamster is a framework that enables users running MPI programs on Apache Hadoop YARN platform. </p><p>This section specifies how to install, configure, and use Hamster.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.8">Prerequisites</h3><p>Hamster is dependent upon Hadoop (Apache Hadoop 2.2.0 / PHD 2.0), Hamster itself contains three parts, hamster-core (client and application master library), hamster-rte (plugins for OpenMPI), OpenMPI.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HamsterRPMPackages">Hamster RPM Packages</h3><p>Hamster contains hamster-core package, hamster-rte package and openmpi package. </p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hamster-core-&lt;PHD_HAMSTER-CORE_VERSION&gt;-1.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Client and application master library</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p> </p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hamster-core installation package</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Every node in the YARN cluster and the client workstation that will access the YARN service.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>hamster-rte-&lt;PHD_HAMSTER-RTE_VERSION&gt;-1.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>Plugins for OpenMPI</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p> </p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>Hamster runtime environment installation package</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Every node in the YARN cluster and the client workstation that will access the YARN service.</p></td></tr></tbody></table></div><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>openmpi-&lt;PHD_OPENMPI_VERSION&gt;-1.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>OpenMPI</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p> </p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>OpenMPI installation package</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Every node in the YARN cluster and the client workstation that will access the YARN service.</p></td></tr></tbody></table></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-Hamster-coreSetup">Hamster-core Setup</h3><p>Install the hamster-core package on the nodes in the YARN cluster and the client workstation that will access the YARN service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hamster/rpm/hamster-core-&lt;PHD_HAMSTER-CORE_VERSION&gt;-1.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-OpenMPISetup">OpenMPI Setup</h3><p>Install the OpenMPI package on the nodes in the YARN cluster and the client workstation that will access the YARN service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hamster/rpm/openmpi-&lt;PHD_OPENMPI_VERSION&gt;-1.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-Hamster-rteSetup">Hamster-rte Setup</h3><p>Install the hamster-rte package on the nodes in the YARN cluster and the client workstation that will access the YARN service:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/hamster/rpm/hamster-rte-&lt;PHD_HAMSTER-RTE_VERSION&gt;-1.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-Runninganexample">Running an example</h3><p>Run the following example to check that Hamster is working:</p><ol><li>Use the<code> hello_c</code> program in OpenMPI's examples folder to validate hamster, this file needs be placed in all nodes with same directory.</li><li>Execute the following command:</li></ol><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ hamster -np 3 /absolute/path/to/hello_c </pre>
</div></div><p>The result should look like this:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">13/08/26 07:36:14 INFO cli.HamsterCli: tracking URL is: http://gphd-vm33:8088/proxy/application_1377213904799_0100/ 
13/08/26 07:36:16 INFO cli.HamsterCli: yarn application state transfered from [ACCEPTED] to [RUNNING] 
13/08/26 07:36:19 INFO cli.HamsterCli: yarn application state transfered from [RUNNING] to [FINISHED] 
13/08/26 07:36:19 INFO cli.HamsterCli: AppMaster is successfully finished.</pre>
</div></div><p>This created an application on YARN, and it will negotiate with the YARN resource manager and execute MPI processes in allocated resources.</p><p>You can copy the tracking URL to your browser, and you can check out your processes' realtime outputs. Note that this page only exists when the job is running.</p><p>Currently, MPI processes in Hamster will redirect their output to files in the Hadoop log folder, you can access them in <code>yarn.nodemanager.log-dirs</code> in <code>yarn-site.xml</code> under <code>$HADOOP_CONF_DIR</code>.</p><p><span class="confluence-anchor-link" id="ManuallyInstallingPivotalHD2.0Stack-HamsterUsage"></span></p><h3 id="ManuallyInstallingPivotalHD2.0Stack-HamsterUsage">Hamster Usage</h3><p>Display Hamster command usage by running:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ hamster --help</pre>
</div></div><p>Most of the original OpenMPI command line options are supported by hamster, including:</p><div class="table-wrap"><table class="confluenceTable"><thead><tr><th class="confluenceTh"><div class="tablesorter-header-inner"><div class="tablesorter-header-inner">Option</div></div></th><th class="confluenceTh"><div class="tablesorter-header-inner"><div class="tablesorter-header-inner">Description</div></div></th></tr></thead><tbody><tr><td class="confluenceTd"><code>-c, -n, --n, -np, --np, -max-vm-size, --max-vm-size</code></td><td class="confluenceTd">Number of processes to run</td></tr><tr><td class="confluenceTd"><code>-bynode, --bynode</code></td><td class="confluenceTd">Whether to map and rank processes round-robin by node</td></tr><tr><td class="confluenceTd"><code>-byslot, --byslot</code></td><td class="confluenceTd">Whether to map and rank processes round-robin by slot</td></tr><tr><td class="confluenceTd"><code>-display-allocation, --display-allocation</code></td><td class="confluenceTd">Display the allocation being used by this job</td></tr><tr><td class="confluenceTd"><code>-display-devel-allocation, --display-devel-allocation</code></td><td class="confluenceTd">Display a detailed list (mostly intended for developers) of the allocation being used by this job</td></tr><tr><td class="confluenceTd"><code>-display-devel-map, --display-devel-map</code></td><td class="confluenceTd">Display a detailed process map (mostly intended for developers) just before launch</td></tr><tr><td class="confluenceTd"><code>-display-diffable-map, --display-diffable-map</code></td><td class="confluenceTd">Display a diffable process map (mostly intended for developers) just before launch.</td></tr><tr><td class="confluenceTd"><code>-display-map, --display-map</code></td><td class="confluenceTd">Display the process map just before launch.</td></tr><tr><td class="confluenceTd"><code>-display-topo, --display-topo</code></td><td class="confluenceTd">Display the topology as part of the process map (mostly intended for developers) just before launch.</td></tr><tr><td class="confluenceTd"><code>-h, --help</code></td><td class="confluenceTd">Show help message|Preload the comma separated list of files to the remote machines current working directory before starting the remote process.</td></tr><tr><td class="confluenceTd"><code>-path, --path</code></td><td class="confluenceTd">PATH to be used to look for executables to start processes</td></tr><tr><td class="confluenceTd"><code>-prefix, --prefix</code></td><td class="confluenceTd">Prefix where Open MPI is installed on remote nodes</td></tr><tr><td class="confluenceTd"><code>-preload-files, --preload-files, -s, --preload-binary</code></td><td class="confluenceTd">Preload the comma separated list of files to the remote machines current working directory before starting the remote process..</td></tr><tr><td class="confluenceTd"><code>-q, --quiet</code></td><td class="confluenceTd">Suppress helpful messages</td></tr><tr><td class="confluenceTd"><code>-report-pid, --report-pid</code></td><td class="confluenceTd">Printout pid on stdout [-], stderr [+], or a file [anything else]</td></tr><tr><td class="confluenceTd"><code>-report-uri, --report-uri</code></td><td class="confluenceTd">Printout URI on stdout [-], stderr [+], or a file [anything else]</td></tr><tr><td class="confluenceTd"><code>-v, --verbose</code></td><td class="confluenceTd">Be verbose</td></tr><tr><td class="confluenceTd"><code>-V, --version</code></td><td class="confluenceTd">Print version and exit</td></tr><tr><td class="confluenceTd"><code>-x</code></td><td class="confluenceTd">Export an environment variable, optionally specifying a value (for example, <code>\"-x foo\"</code> exports the environment variable foo and takes its value from the current environment; <code>\"-x foo=bar\"</code> exports the environment variable name foo and sets its value to <code>\"bar\"</code> in the started processes)</td></tr></tbody></table></div><p>These are some new command line options for hamster:</p><div class="table-wrap"><table class="confluenceTable"><thead><tr><th class="confluenceTh"><div class="tablesorter-header-inner"><div class="tablesorter-header-inner">Option</div></div></th><th class="confluenceTh"><div class="tablesorter-header-inner"><div class="tablesorter-header-inner">Description</div></div></th></tr></thead><tbody><tr><td class="confluenceTd"><code>-cpu, --cpu-per-proc</code></td><td class="confluenceTd">Specify how many v-cores allocated to each MPI proc</td></tr><tr><td class="confluenceTd"><code>-mem, --mem-per-proc</code></td><td class="confluenceTd">Specify how many memory (in MB) allocated to each MPI proc</td></tr><tr><td class="confluenceTd"><code>-max-at, --max-allocation-time</code></td><td class="confluenceTd">Maximum time used do allocation (in milli-seconds), after timeout, all allocated containers will be returned and job will be failed, we use min <code>{ $max-alloc, yarn.resourcemanager.rm.container-allocation.expiry-interval-ms}</code> as the actual expired timeout</td></tr><tr><td class="confluenceTd"><code>-p, --policy</code></td><td class="confluenceTd">Policy for scheduling. Valid value: <code>{ default, compute-locality/cl }</code></td></tr><tr><td class="confluenceTd"><code>-preload-archives, --preload-archives</code></td><td class="confluenceTd">Preload the comma separated list of archives to the remote machines current working directory and un-zip before starting the remote process.</td></tr></tbody></table></div><h2 id="ManuallyInstallingPivotalHD2.0Stack-GraphLab">GraphLab</h2><p>GraphLab is a powerful new system for designing and implementing parallel algorithms in machine learning. It is a graph-based, high performance, distributed computation framework written in C++.  It makes use of MPI and has its own programming model.</p><p>You can find more information about GraphLab here: <a class="external-link" href="http://graphlab.org/" rel="nofollow">GraphLab.org</a>.</p><p>Because GraphLab itself cannot run on YARN, we have integrated GraphLab into Hadoop using Hamster.</p><p>Following is an architecture diagram for GraphLab on Hadoop:</p><p><img class="confluence-embedded-image" data-image-src="attachments/67047907/68125265.png" src="attachments/67047907/68125265.png" width="400"/></p><h3 id="ManuallyInstallingPivotalHD2.0Stack-Prerequisites.9">Prerequisites</h3><ul><li><code>hadoop-libhdfs*.rpm</code> and JRE package <br/>If GraphLab algorithm's input/output are from/to HDFS, then <code>hadoop-libhdfs*.rpm</code> and JRE package are required. <br/>When installing PHD2.0, make sure that <code>hadoop-libhdfs*.rpm</code> is installed and  <code>$JAVA_HOME/jre/lib/amd64/server/libjvm.so</code> is found.</li><li>Hamster </li></ul><h3 id="ManuallyInstallingPivotalHD2.0Stack-GraphLabRPMPackages">GraphLab RPM Packages</h3><p>GraphLab consists of the following graphlab <code>rpm</code> package:</p><div class="table-wrap"><table class="confluenceTable"><tbody><tr><td class="confluenceTd" colspan="2"><p><code> <strong>graphlab-&lt;PHD_GraphLab_VERSION&gt;-1.x86_64.rpm</strong> </code></p></td></tr><tr><td class="confluenceTd"><p><strong>Type</strong></p></td><td class="confluenceTd"><p>GraphLab</p></td></tr><tr><td class="confluenceTd"><p><strong>Requires</strong></p></td><td class="confluenceTd"><p>Hamster, hadoop-libhdfs*.rpm, jre package </p></td></tr><tr><td class="confluenceTd"><p><strong>Description</strong></p></td><td class="confluenceTd"><p>GraphLab installation package</p></td></tr><tr><td class="confluenceTd"><p><strong>Install on Nodes</strong></p></td><td class="confluenceTd"><p>Every node in the YARN cluster and the client workstation that will access the YARN service.</p></td></tr></tbody></table></div><p><br/> <strong>GraphLab Setup</strong></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: bash; gutter: false" style="font-size:12px;">$ sudo rpm -ivh working_dir/graphlab/rpm/graphlab-&lt;PHD_GraphLab_VERSION&gt;-1.x86_64.rpm</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-Runninganexample.1">Running an example</h3><p>Following is an example to test that GraphLab is working:</p><p>Create a <code>connected_component.ini</code> file that contains the following:</p><p style="margin-left: 30.0px;"><code>1 2 4.0</code> <br/> <code>2 3 1.0</code> <br/> <code>3 4 5.0</code> <br/> <code>4 5 2.0</code> <br/> <code>5 3 3.0</code></p><p>Run the following commands to place this file in HDFS, and create a folder in HDFS for write output:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">sudo -u hdfs hadoop fs -mkdir /graphlab
sudo -u hdfs hadoop fs -copyFromLocal /path/to/connected_component.ini /graphlab/connected_component.ini
sudo -u hdfs hadoop fs -mkdir -p /graphlab/connected_component/output
sudo -u hdfs hadoop fs -chmod 777 /graphlab/connected_component/output
</pre>
</div></div><p>Run the following command to execute a GraphLab job:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">hamster -np 2 /${graphlab_installation_dir}/graph_analytics/connected_component  --saveprefix=hdfs://${hdfs_hostname}:${hdfs_port}/graphlab/connected_component/output/ --graph=hdfs://${hdfs_hostname}:${hdfs_port}/graphlab/connected_component.ini --format=tsv</pre>
</div></div><p>You should see the following log:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">&lt;...Trimmed...&gt;
14/01/27 10:13:12 INFO cli.HamsterCli: tracking URL is: http:// &lt;...Trimmed...&gt;
14/01/27 10:13:15 INFO cli.HamsterCli: yarn application state transfered from [ACCEPTED] to [RUNNING]
14/01/27 10:13:22 INFO cli.HamsterCli: yarn application state transfered from [RUNNING] to [FINISHED]
14/01/27 10:13:22 INFO cli.HamsterCli: AppMaster is successfully finished</pre>
</div></div><h3 id="ManuallyInstallingPivotalHD2.0Stack-Usage.9">Usage</h3><p>You can find Usage information for GraphLab's toolkits here: <a class="external-link" href="http://docs.graphlab.org/toolkits.html" rel="nofollow">http://docs.graphlab.org/toolkits.html</a>.</p><p>Note that here we are using Hamster as MPI runtime behind GraphLab.  Therefore you should use <code>hamster</code> instead of <code>mpirun</code> or <code>mpiexec</code> to submit jobs to the cluster.</p><p>To display <code>hamster</code> command usage, run <code>hamster -h</code> or refer to <a href="#ManuallyInstallingPivotalHD2.0Stack-HamsterUsage">hamster usage</a> in this documentation.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-YARNconfigurationsforbetterusingGraphLabonHamster">YARN configurations for better using GraphLab on Hamster</h3><h4 id="ManuallyInstallingPivotalHD2.0Stack-Resourceallocation/monitoring">Resource allocation / monitoring</h4><p>In YARN, both virtual and physical memory usage for processes running on node managers are checked, if your program used more memory than requested, it will be killed by node manager. You can change your memory/cpu limit for your processes by specifying -mem (--mem-per-proc) and -cpu (--cpu-per-proc). For example, if you want your program using 2G memory and 2 cores, you can use following command to execute your job.</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">hamster -mem 2048 -cpu 2 -np N &lt;your-program and parameters&gt; </pre>
</div></div><p>By default, -mem is 1024 (in MB) and -cpu is 1. But this is not enough, YARN itself has some properties will take effect on resource allocation and monitoring, all in yarn-site.xml in $HADOOP_CONF_DIR.</p><ul><li><strong>yarn.scheduler.maximum-allocation-mb and yarn.scheduler.maximum-allocation-vcores</strong> <br/> The two options will limit YARN's maximum resource allocation for each launched process. By default, they're 8192 (8GB memory) and 32 (cores), if your process need more than this limit, you need increase theirs values. GraphLab will consume lots of memory, <strong>we suggest at least, specify maximum allocation memory in YARN to 16GB</strong>. Otherwise, you may find your program frequently killed by node managers, which can be found in logs of node managers.</li><li><strong>yarn.nodemanager.vmem-check-enabled and yarn.nodemanager.vmem-pmem-ratio</strong> <br/> First option indicated if we need check virtual memory. For example, some program will use a lot of virtual memory but barely use physic memory (GraphLab is an example). If you think it's unnecessary to check virtual memory, you can set it to <strong>false</strong> to disable this checking (default is true). The second option is the ratio for physical memory to virtual memory, default is 2.1. For example, if you specified 2G memory (which can be specified by hamster -mem 2048 ...) for your processes, the limit of its physical memory is 2G, and the limit of its virtual memory is 2.1 * 2G = 4.2G. This process will be killed  <strong>either</strong> it used 2G physical memory or 4.2G virtual memory. We suggest set a higher value of this to reduce your processes killed by node manager when it used too much virtual memory.</li></ul><h4 id="ManuallyInstallingPivotalHD2.0Stack-FetchlogforHamsterjob">Fetch log for Hamster job</h4><p>A traditional OpenMPI program has a feature that allows you to get logs when jobs are running. In Hamster 1.0 we don't support this because we launch MPI processes in a different way, however you can still get all log files after the job is finished.</p><p>We recommend that you set <code>yarn.log-aggregation-enable</code> to <code>true</code> in the <code>yarn-site.xml</code> (by default this is disabled). If this is left disabled, fetching logs for Hamster jobs is more complicated, as you have to use the YARN web server to get your logs like other applications (for example, map-reduce).</p><p>When the <code>yarn.log-aggregation-enable</code> set to <code>true</code>, you need to be aware that the parent directory of <code>yarn.nodemanager.remote-app-log-dir</code> in HDFS should have write permission for the <code>yarn</code>user. By default, the <code>yarn.nodemanager.remote-app-log-dir</code> is set to <code>/tmp/logs</code>.</p><p>Once you have set the above properties, run the following command to get the log after your job has finished:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">yarn logs -applicationId &lt;application ID&gt; -appOwner &lt;Application Owner&gt;</pre>
</div></div><p>Note that <code>-appOwner</code> should be set to the user name used to submit Hamster job; when you used a different user name execute the <code>yarn logs</code> command.</p><h3 id="ManuallyInstallingPivotalHD2.0Stack-TroubleShooting">TroubleShooting</h3><p><strong>Q:  When I run <code>rpm -ivh hamster-rte*.rpm</code>, the following error is thrown:</strong> <br/> <code>   </code> <code> error: Failed dependencies:  pkgconfig(zlib) is needed by avro-1.7.5-1.x86_64 </code></p><p>A:  You need to run <code>yum install zlib-devel.x86_64 zlib.x86_64</code>  first</p><p> </p><p><strong>Q: I'm unable to see Hadoop logs.</strong></p><p>A: You need to add the following property in <code>yarn-site.xml</code>:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;"> &lt;property&gt;
    &lt;description&gt;&lt;/description&gt;
    &lt;name&gt;yarn.nodemanager.delete.debug-delay-sec&lt;/name&gt;
    &lt;value&gt;1800&lt;/value&gt;
  &lt;/property&gt;</pre>
</div></div><p>Then restart resource manager and nodemanager.</p><p><strong>Q: When I run <code>hamster -np 3 /absolute/path/to/hello_c</code>, the following error is thrown:</strong></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=root, access=WRITE, inode="/user":hdfs:supergroup:drwxr-xr-x
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:234)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:214)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:158)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:5380)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:5362)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkAncestorAccess(FSNamesystem.java:5336)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirsInternal(FSNamesystem.java:3583)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirsInt(FSNamesystem.java:3553)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3525)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:745)</pre>
</div></div><p>A: Run the following command:</p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">sudo -u hdfs hadoop fs -chmod 777 /userhadoop fs -mkdir /user/root</pre>
</div></div><p><br/> <br/> <strong>Q: I see the following in the YARN logs:</strong></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">14/02/17 09:52:30 ERROR event.HamsterEventHandler: exception when launch HNP process
java.io.IOException: Cannot run program "mpirun": error=13, Permission denied
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1029)
at java.lang.Runtime.exec(Runtime.java:615)
at java.lang.Runtime.exec(Runtime.java:526)
at com.pivotal.hamster.appmaster.hnp.DefaultHnpLauncher$1.run(DefaultHnpLauncher.java:84)
at java.lang.Thread.run(Thread.java:722)
Caused by: java.io.IOException: error=13, Permission denied
at java.lang.UNIXProcess.forkAndExec(Native Method)
at java.lang.UNIXProcess.&lt;init&gt;(UNIXProcess.java:135)
at java.lang.ProcessImpl.start(ProcessImpl.java:130)
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1021)
... 4 more</pre>
</div></div><p>A: Check if the node that is running the hamster application has openmpi installed.</p><p><br/> <strong>Q: I see the following information in logs:</strong></p><div class="code panel pdl" style="border-width: 1px;"><div class="codeContent panelContent pdl">
<pre class="theme: Confluence; brush: java; gutter: false" style="font-size:12px;">LogType: stderr
LogLength: 48
Log Contents:
/bin/bash: /bin/java: No such file or directory
LogType: stdout
LogLength: 0
Log Contents:</pre>
</div></div><p>A: Make sure <code>$JAVA_HOME</code> is set:</p><p style="margin-left: 30.0px;">Run this command: echo $JAVA_HOME You should see something like this:</p><p style="margin-left: 30.0px;"><code>/usr/java/latest<br/> <br/> </code></p><p><strong> <strong>Q: I see the following information in logs:</strong> <br/> </strong></p><p style="margin-left: 30.0px;"><code>ERROR: fiber_control.cpp(launch:229): Check failed: b&lt;nworkers [1 &lt; 1]</code></p><p>A: Find another machine which has more than 1 core, this is a limitation.</p>
</div></div>
            </div><!-- end of content-->
            
            
          </div><!-- end of container -->
        </div><!--end of container-fluid-->
      </div><!--end of main-wrap-->

      <div class="site-footer desktop-only">
          <div class="container-fluid">
              <div class="site-footer-links">
                  <span class="version"><a href='/'>Pivotal Documentation</a></span>
                  <span>&copy;
                      <script>
                          var d = new Date();
                          document.write(d.getFullYear());
                      </script>
                      <a href='http://gopivotal.com'>Pivotal Software</a> Inc. All Rights Reserved.
                  </span>
              </div>
          </div>
      </div>

      <script type="text/javascript">
          (function() {
              var didInit = false;
              function initMunchkin() {
                  if(didInit === false) {
                      didInit = true;
                      Munchkin.init('625-IUJ-009');
                  }
              }
              var s = document.createElement('script');
              s.type = 'text/javascript';
              s.async = true;
              s.src = document.location.protocol + '//munchkin.marketo.net/munchkin.js';
              s.onreadystatechange = function() {
                  if (this.readyState == 'complete' || this.readyState == 'loaded') {
                      initMunchkin();
                  }
              };
              s.onload = initMunchkin;
              document.getElementsByTagName('head')[0].appendChild(s);
          })();
      </script>
  </div><!--end of viewport-->
  <div id="scrim"></div>
</body>
</html>