forked from google-coral/libedgetpu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreal_time_dma_scheduler.h
201 lines (170 loc) · 7.25 KB
/
real_time_dma_scheduler.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DARWINN_DRIVER_REAL_TIME_DMA_SCHEDULER_H_
#define DARWINN_DRIVER_REAL_TIME_DMA_SCHEDULER_H_
#include <condition_variable> // NOLINT
#include <list>
#include <memory>
#include <mutex> // NOLINT
#include <queue>
#include <unordered_map>
#include <vector>
#include "api/driver.h"
#include "api/package_reference.h"
#include "api/timing.h"
#include "api/watchdog.h"
#include "driver/dma_info.h"
#include "driver/dma_scheduler.h"
#include "driver/package_registry.h"
#include "driver/single_queue_dma_scheduler.h"
#include "driver/tpu_request.h"
#include "driver_shared/time_stamper/time_stamper.h"
#include "port/errors.h"
#include "port/ptr_util.h"
#include "port/status.h"
#include "port/std_mutex_lock.h"
#include "port/thread_annotations.h"
namespace platforms {
namespace darwinn {
namespace driver {
// Manages DMA with best-effort QoS. Works as a gating function to the
// underlying single queue DMA.
class RealTimeDmaScheduler : public DmaScheduler {
public:
RealTimeDmaScheduler() = delete;
RealTimeDmaScheduler(std::unique_ptr<api::Watchdog> watchdog,
std::unique_ptr<driver_shared::TimeStamper> time_stamper)
: backing_scheduler_(
gtl::MakeUnique<SingleQueueDmaScheduler>(std::move(watchdog))),
time_stamper_(std::move(time_stamper)) {}
~RealTimeDmaScheduler() override = default;
// Implements DmaScheduler interfaces.
util::Status Open() override;
util::Status Close(api::Driver::ClosingMode mode) override;
// Submits a request. Forwards everything to the backing DMA scheduler (Single
// queue DMA) in normal mode; accepts the request if real-time constraint can
// be met, i.e. not impacting other service guarantees, otherwise rejects the
// request in real-time mode. For details, please refer to
// go/darwinn-qos-design.
util::Status Submit(std::shared_ptr<TpuRequest> request) override
LOCKS_EXCLUDED(mutex_);
// DmaScheduler interface. Simply forward them to the backing scheduler.
util::Status NotifyRequestCompletion() override;
util::Status CancelPendingRequests() override {
return backing_scheduler_->CancelPendingRequests();
}
util::Status WaitActiveRequests() override {
return backing_scheduler_->WaitActiveRequests();
}
// Implements lower level DMA routines. They should be directly forwarded to
// the backing driver.
util::StatusOr<DmaDescriptorType> PeekNextDma() const override {
return backing_scheduler_->PeekNextDma();
}
util::StatusOr<DmaInfo *> GetNextDma() override {
return backing_scheduler_->GetNextDma();
}
util::Status NotifyDmaCompletion(DmaInfo *dma_info) override {
return backing_scheduler_->NotifyDmaCompletion(dma_info);
}
bool IsEmpty() const override {
return backing_scheduler_->IsEmpty();
}
int64 MaxRemainingCycles() const override {
return backing_scheduler_->MaxRemainingCycles();
}
util::StatusOr<std::shared_ptr<TpuRequest>> GetOldestActiveRequest()
const override {
return backing_scheduler_->GetOldestActiveRequest();
}
// Enters/leaves real-time mode. Note timing is preserved across toggling.
void SetRealtimeMode(bool on) LOCKS_EXCLUDED(mutex_);
// Clears all timing information.
void ResetTiming() LOCKS_EXCLUDED(mutex_) {
StdMutexLock lock(&mutex_);
inference_timings_.clear();
}
// Sets expected arrival rates, max execution time and tolerance (in
// milliseconds) for an executable reference.
// -1 in any of the fields of api::Timing means keeping that individual value
// unchanged but updating the rest.
util::Status SetExecutableTiming(const ExecutableReference *executable,
const api::Timing &timing)
LOCKS_EXCLUDED(mutex_);
// Removes timing information for a registered model.
util::Status RemoveExecutableTiming(const ExecutableReference *executable)
LOCKS_EXCLUDED(mutex_);
// Returns the arrival rate and FPS of a given executable reference.
util::StatusOr<api::Timing> GetExecutableTiming(
const ExecutableReference *executable) const LOCKS_EXCLUDED(mutex_);
// Returns the arrival time of last request for a given executable reference.
int64 GetLastArrivalTime(const ExecutableReference *executable) const
LOCKS_EXCLUDED(mutex_);
util::StatusOr<int> GetExecutableFPS(
const ExecutableReference *executable) const {
ASSIGN_OR_RETURN(const auto timing, GetExecutableTiming(executable));
return timing.fps;
}
util::StatusOr<int> GetExecutableMaxExecutionTimeMs(
const ExecutableReference *executable) const {
ASSIGN_OR_RETURN(const auto timing, GetExecutableTiming(executable));
return timing.max_execution_time_ms;
}
util::StatusOr<int> GetExecutableToleranceMs(
const ExecutableReference *executable) const {
ASSIGN_OR_RETURN(const auto timing, GetExecutableTiming(executable));
return timing.tolerance_ms;
}
private:
// Tracks timing requirements and statistics for a registered executable.
// See go/darwinn-qos-design on the algorithm.
struct TimingInternal : public api::Timing {
TimingInternal() : api::Timing() {}
explicit TimingInternal(api::Timing timing) : api::Timing{timing} {}
// Returns max execution time in microseconds.
int64 max_execution_time_us() const { return max_execution_time_ms * 1000; }
// Returns tolerance in microseconds.
int64 tolerance_us() const { return tolerance_ms * 1000; }
// Returns per frame time in microseconds, or error when FPS == 0.
util::StatusOr<int64> frame_time_us() const {
if (fps == 0) {
return util::InvalidArgumentError(
"Can't calculate frame time of 0 FPS");
}
return 1e6 / fps;
}
// Returns if a timing configuration is real-time.
bool HasRealTimeRequirements() const { return fps > 0; }
int64 last_arrival_time_us{0};
int64 last_completion_time_us{0};
};
// The underlying single queue DMA scheduler. Thread-safe by itself.
std::unique_ptr<SingleQueueDmaScheduler> backing_scheduler_;
// Time-stamper for tracking submission and completion time for requests.
std::unique_ptr<driver_shared::TimeStamper> time_stamper_;
// Tracks registered executables.
std::unordered_map<const ExecutableReference *, TimingInternal>
inference_timings_ GUARDED_BY(mutex_);
// Real-time mode? In non-real-time mode, this scheduler behaves the same as
// the underlying scheduler (single queue DMA).
bool real_time_mode_ GUARDED_BY(mutex_){false};
// Currently booked time by all scheduled inferences.
int64 time_booked_us_ GUARDED_BY(mutex_){0};
// Guards the inference timings.
mutable std::mutex mutex_;
};
} // namespace driver
} // namespace darwinn
} // namespace platforms
#endif // DARWINN_DRIVER_SINGLE_QUEUE_DMA_SCHEDULER_H_