runner

-- import "github.com/Zemanta/mrgob/runner"

Usage

var (
	ErrMissingJobPath = fmt.Errorf("Missing job path")
	ErrMissingInput   = fmt.Errorf("Missing input")
	ErrMissingOutput  = fmt.Errorf("Missing output")
)

var (
	HadoopStatusIdle    HadoopStatus = 0
	HadoopStatusRunning HadoopStatus = 1
	HadoopStatusSuccess HadoopStatus = 2
	HadoopStatusFailed  HadoopStatus = -1

	ErrNotRunning            = fmt.Errorf("Command not running")
	ErrRunning               = fmt.Errorf("Application running")
	ErrStarted               = fmt.Errorf("Application can only be run once")
	ErrMissingApplicationId  = fmt.Errorf("Missing application id")
	ErrMissingHadoopProvider = fmt.Errorf("Missing Hadoop provider")
)

func ExecOnCluster

func ExecOnCluster(retries int, arguments ...string) error

func SetDefaultHadoopProvider

func SetDefaultHadoopProvider(p provider.HadoopProvider)

type HadoopApplicationLogs

type HadoopApplicationLogs struct {
	Raw string

	ContainerLogs []*HadoopContainerLogs
}

func (*HadoopApplicationLogs) AppLog

func (l *HadoopApplicationLogs) AppLog() string

func (*HadoopApplicationLogs) StdErr

func (l *HadoopApplicationLogs) StdErr() string

func (*HadoopApplicationLogs) StdOut

func (l *HadoopApplicationLogs) StdOut() string

func (*HadoopApplicationLogs) String

func (l *HadoopApplicationLogs) String() string

func (*HadoopApplicationLogs) SysLog

func (l *HadoopApplicationLogs) SysLog() string

type HadoopApplicationStatus

type HadoopApplicationStatus struct {
	App struct {
		AllocatedMB                int     `json:"allocatedMB"`
		AllocatedVCores            int     `json:"allocatedVCores"`
		AmContainerLogs            string  `json:"amContainerLogs"`
		AmHostHTTPAddress          string  `json:"amHostHttpAddress"`
		ApplicationTags            string  `json:"applicationTags"`
		ApplicationType            string  `json:"applicationType"`
		ClusterID                  int     `json:"clusterId"`
		Diagnostics                string  `json:"diagnostics"`
		ElapsedTime                int     `json:"elapsedTime"`
		FinalStatus                string  `json:"finalStatus"`
		FinishedTime               int     `json:"finishedTime"`
		ID                         string  `json:"id"`
		MemorySeconds              int     `json:"memorySeconds"`
		Name                       string  `json:"name"`
		NumAMContainerPreempted    int     `json:"numAMContainerPreempted"`
		NumNonAMContainerPreempted int     `json:"numNonAMContainerPreempted"`
		PreemptedResourceMB        int     `json:"preemptedResourceMB"`
		PreemptedResourceVCores    int     `json:"preemptedResourceVCores"`
		Progress                   float64 `json:"progress"`
		Queue                      string  `json:"queue"`
		RunningContainers          int     `json:"runningContainers"`
		StartedTime                int     `json:"startedTime"`
		State                      string  `json:"state"`
		TrackingUI                 string  `json:"trackingUI"`
		TrackingURL                string  `json:"trackingUrl"`
		User                       string  `json:"user"`
		VcoreSeconds               int     `json:"vcoreSeconds"`
	} `json:"app"`
}

type HadoopCommand

type HadoopCommand struct {
}

func NewMapReduce

func NewMapReduce(c *MapReduceConfig) (*HadoopCommand, error)

func NewRawMapReduce

func NewRawMapReduce(arguments ...string) *HadoopCommand

func (*HadoopCommand) ApplicationId

func (hc *HadoopCommand) ApplicationId() (string, error)

func (*HadoopCommand) CmdOutput

func (hc *HadoopCommand) CmdOutput() (stdOut string, stdErr string, cmdErr error)

func (*HadoopCommand) FetchApplicationLogs

func (hc *HadoopCommand) FetchApplicationLogs() (*HadoopApplicationLogs, error)

func (*HadoopCommand) FetchApplicationStatus

func (hc *HadoopCommand) FetchApplicationStatus() (*HadoopApplicationStatus, error)

func (*HadoopCommand) FetchDebugData

func (hc *HadoopCommand) FetchDebugData() (*HadoopDebugData, error)

func (*HadoopCommand) FetchJobCounters

func (hc *HadoopCommand) FetchJobCounters() (HadoopJobCounters, error)

func (*HadoopCommand) Run

func (hc *HadoopCommand) Run() HadoopStatus

func (*HadoopCommand) SetRetries

func (hc *HadoopCommand) SetRetries(n int)

func (*HadoopCommand) Status

func (hc *HadoopCommand) Status() HadoopStatus

func (*HadoopCommand) Tries

func (hc *HadoopCommand) Tries() []*HadoopRun

func (*HadoopCommand) Wait

func (hc *HadoopCommand) Wait() HadoopStatus

type HadoopContainerLogs

type HadoopContainerLogs struct {
	Container string
	Host      string

	StdOut string
	StdErr string
	SysLog string

	AppLog string
}

type HadoopDebugData

type HadoopDebugData struct {
	Logs     *HadoopApplicationLogs
	Counters HadoopJobCounters
	Status   *HadoopApplicationStatus

	StdOut string
	StdErr string
	CmdErr error
}

type HadoopJobCounterData

type HadoopJobCounterData struct {
	Name               string `json:"name"`
	MapCounterValue    int    `json:"mapCounterValue"`
	ReduceCounterValue int    `json:"reduceCounterValue"`
	TotalCounterValue  int    `json:"totalCounterValue"`
}

type HadoopJobCounters

type HadoopJobCounters map[string]HadoopJobCountersGroup

func (HadoopJobCounters) AppCounters

func (c HadoopJobCounters) AppCounters() HadoopJobCountersGroup

type HadoopJobCountersGroup

type HadoopJobCountersGroup map[string]HadoopJobCounterData

func (HadoopJobCountersGroup) String

func (c HadoopJobCountersGroup) String() string

type HadoopRun

type HadoopRun struct {
}

func (*HadoopRun) ApplicationId

func (hr *HadoopRun) ApplicationId() (string, error)

func (*HadoopRun) CmdOutput

func (hr *HadoopRun) CmdOutput() (stdOut string, stdErr string, cmdErr error)

func (*HadoopRun) FetchApplicationLogs

func (hr *HadoopRun) FetchApplicationLogs() (*HadoopApplicationLogs, error)

func (*HadoopRun) FetchApplicationStatus

func (hr *HadoopRun) FetchApplicationStatus() (*HadoopApplicationStatus, error)

func (*HadoopRun) FetchDebugData

func (hr *HadoopRun) FetchDebugData() (*HadoopDebugData, error)

func (*HadoopRun) FetchJobCounters

func (hr *HadoopRun) FetchJobCounters() (HadoopJobCounters, error)

type HadoopStatus

type HadoopStatus int

type MapReduceConfig

type MapReduceConfig struct {
	// Job name.
	Name string

	// Number of reducers.
	ReduceTasks int
	// Number of mappers.
	MapTasks int

	// S3 or HDFS path to the executable job implementing "Init*Job" interface.
	JobPath string

	// Job configuration that will be made available in mapper and reducer jobs.
	JobConfig interface{}

	// List of input files.
	Input []string
	// Output directory.
	Output string

	// Other custom -D properties passes to the job.
	CustomProperties map[string]string
	// Other files that will be downloaded next to the executable before running the job.
	AdditionalFiles []string
	// Environment options passed to the mapreduce jobs.
	Env map[string]string
}

Files

README.md

Latest commit

History

README.md

File metadata and controls

runner

Usage

func ExecOnCluster

func SetDefaultHadoopProvider

type HadoopApplicationLogs

func (*HadoopApplicationLogs) AppLog

func (*HadoopApplicationLogs) StdErr

func (*HadoopApplicationLogs) StdOut

func (*HadoopApplicationLogs) String

func (*HadoopApplicationLogs) SysLog

type HadoopApplicationStatus

type HadoopCommand

func NewMapReduce

func NewRawMapReduce

func (*HadoopCommand) ApplicationId

func (*HadoopCommand) CmdOutput

func (*HadoopCommand) FetchApplicationLogs

func (*HadoopCommand) FetchApplicationStatus

func (*HadoopCommand) FetchDebugData

func (*HadoopCommand) FetchJobCounters

func (*HadoopCommand) Run

func (*HadoopCommand) SetRetries

func (*HadoopCommand) Status

func (*HadoopCommand) Tries

func (*HadoopCommand) Wait

type HadoopContainerLogs

type HadoopDebugData

type HadoopJobCounterData

type HadoopJobCounters

func (HadoopJobCounters) AppCounters

type HadoopJobCountersGroup

func (HadoopJobCountersGroup) String

type HadoopRun

func (*HadoopRun) ApplicationId

func (*HadoopRun) CmdOutput

func (*HadoopRun) FetchApplicationLogs

func (*HadoopRun) FetchApplicationStatus

func (*HadoopRun) FetchDebugData

func (*HadoopRun) FetchJobCounters

type HadoopStatus

type MapReduceConfig