-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
Copy pathconntrack_types.h
235 lines (208 loc) · 8.1 KB
/
conntrack_types.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
// Project Calico BPF dataplane programs.
// Copyright (c) 2020-2021 Tigera, Inc. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
#ifndef __CALI_CONNTRACK_TYPES_H__
#define __CALI_CONNTRACK_TYPES_H__
// Connection tracking.
struct calico_ct_key {
__u32 protocol;
ipv46_addr_t addr_a; // NBO
ipv46_addr_t addr_b; // NBO
__u16 port_a, port_b; // HBO
};
enum cali_ct_type {
CALI_CT_TYPE_NORMAL = 0x00, /* Non-NATted entry. */
CALI_CT_TYPE_NAT_FWD = 0x01, /* Forward entry for a DNATted flow, keyed on orig src/dst.
* Points to the reverse entry.
*/
CALI_CT_TYPE_NAT_REV = 0x02, /* "Reverse" entry for a NATted flow, contains NAT +
* tracking information.
*/
};
#define CALI_CT_FLAG_NAT_OUT 0x01
#define CALI_CT_FLAG_DSR_FWD 0x02 /* marks entry into the tunnel on the fwd node when dsr */
#define CALI_CT_FLAG_NP_FWD 0x04 /* marks entry into the tunnel on the fwd node */
#define CALI_CT_FLAG_SKIP_FIB 0x08 /* marks traffic that should pass through host IP stack */
#define CALI_CT_FLAG_RES_0x10 0x10 /* reserved */
#define CALI_CT_FLAG_RES_0x20 0x20 /* reserved */
#define CALI_CT_FLAG_EXT_LOCAL 0x40 /* marks traffic from external client to a local service */
#define CALI_CT_FLAG_VIA_NAT_IF 0x80 /* marks connection first seen on the service veth */
#define CALI_CT_FLAG_BA 0x100 /* marks that src->dst is the B->A leg */
#define CALI_CT_FLAG_HOST_PSNAT 0x200 /* marks that this is from host port collision resolution */
#define CALI_CT_FLAG_SVC_SELF 0x400 /* marks connections from a pod via service to self */
#define CALI_CT_FLAG_NP_LOOP 0x800 /* marks connections that were turned around when accessing nodeport on a local IP */
#define CALI_CT_FLAG_NP_REMOTE 0x1000 /* marks connections from local host to remote backend of a nodeport */
#define CALI_CT_FLAG_NP_NO_DSR 0x2000 /* marks connections from a client which is excluded from DSR */
struct calico_ct_leg {
__u64 bytes;
__u32 packets;
__u32 seqno;
__u32 syn_seen:1;
__u32 ack_seen:1;
__u32 fin_seen:1;
__u32 rst_seen:1;
__u32 approved:1;
__u32 opener:1;
__u32 ifindex; /* For a CT leg where packets ingress through an interface towards
* the host, this is the ingress interface index. For a CT leg
* where packets originate _from_ the host, it's CT_INVALID_IFINDEX
* (0).
*/
};
#define CT_INVALID_IFINDEX 0
struct calico_ct_value {
__u64 created;
__u64 last_seen; // 8
__u8 type; // 16
__u8 flags;
// Important to use explicit padding, otherwise the compiler can decide
// not to zero the padding bytes, which upsets the verifier. Worse than
// that, debug logging often prevents such optimisation resulting in
// failures when debug logging is compiled out only :-).
__u8 pad0[5];
__u8 flags2;
union {
// CALI_CT_TYPE_NORMAL and CALI_CT_TYPE_NAT_REV.
struct {
struct calico_ct_leg a_to_b; // 24
struct calico_ct_leg b_to_a; // 48
// CALI_CT_TYPE_NAT_REV
ipv46_addr_t tun_ip; // 72
ipv46_addr_t orig_ip; // 76
__u16 orig_port; // 80
__u16 orig_sport; // 82
ipv46_addr_t orig_sip; // 84
};
// CALI_CT_TYPE_NAT_FWD; key for the CALI_CT_TYPE_NAT_REV entry.
struct {
struct calico_ct_key nat_rev_key; // 24
__u16 nat_sport;
#ifdef IPVER6
__u8 pad2[60];
#else
__u8 pad2[46];
#endif
};
};
/* 64bit aligned by here */
};
static CALI_BPF_INLINE void __xxx_compile_asserts(void) {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-local-typedef"
#ifdef IPVER6
COMPILE_TIME_ASSERT((sizeof(struct calico_ct_value) == 128))
#else
COMPILE_TIME_ASSERT((sizeof(struct calico_ct_value) == 88))
#endif
#pragma clang diagnostic pop
}
#define ct_value_set_flags(v, f) do { \
(v)->flags |= ((f) & 0xff); \
(v)->flags2 |= (((f) >> 8) & 0xff); \
} while(0)
#define ct_value_get_flags(v) ({ \
__u16 ret = (v)->flags | ((v)->flags2 << 8); \
\
ret; \
})
struct ct_lookup_ctx {
__u8 proto;
DECLARE_IP_ADDR(src);
DECLARE_IP_ADDR(dst);
__u16 sport;
__u16 dport;
struct tcphdr *tcp;
};
struct ct_create_ctx {
ipv46_addr_t orig_src;
ipv46_addr_t src;
ipv46_addr_t orig_dst;
ipv46_addr_t dst;
__u16 sport;
__u16 dport;
__u16 orig_dport;
__u16 orig_sport;
struct tcphdr *tcp;
ipv46_addr_t tun_ip; /* is set when the packet arrive through the NP tunnel.
* It is also set on the first node when we create the
* initial CT entry for the tunneled traffic. */
__u16 flags;
__u8 proto;
__u8 __pad;
enum cali_ct_type type;
bool allow_return;
};
#ifdef IPVER6
CALI_MAP_NAMED(cali_v6_ct, cali_ct, 3,
#else
CALI_MAP_NAMED(cali_v4_ct, cali_ct, 3,
#endif
BPF_MAP_TYPE_HASH,
struct calico_ct_key, struct calico_ct_value,
512000, BPF_F_NO_PREALLOC)
enum calico_ct_result_type {
/* CALI_CT_NEW means that the packet is not part of a known conntrack flow.
* TCP SYN packets are always treated as NEW so they always go through policy. */
CALI_CT_NEW = 0,
/* CALI_CT_MID_FLOW_MISS indicates that the packet is known to be of a type that
* cannot be the start of a flow but it also has no matching conntrack entry. For
* example, a TCP packet without SYN set. */
CALI_CT_MID_FLOW_MISS = 1,
/* CALI_CT_ESTABLISHED indicates the packet is part of a known flow, approved at "this"
* side. I.e. it's safe to let this packet through _this_ program. If a packet is
* ESTABLISHED but not ESTABLISHED_BYPASS then it has only been approved by _this_
* program, but downstream programs still need to have their say. For example, if this
* is a workload egress program then it implements egress policy for one workload. If
* that workload communicates with another workload on the same host then the packet
* needs to be approved by the ingress policy program attached to the other workload. */
CALI_CT_ESTABLISHED = 2,
/* CALI_CT_ESTABLISHED_BYPASS indicates the packet is part of a known flow and *both*
* legs of the conntrack entry have been approved. Hence it is safe to set the bypass
* mark bit on the traffic so that any downstream BPF programs let the packet through
* automatically. */
CALI_CT_ESTABLISHED_BYPASS = 3,
/* CALI_CT_ESTABLISHED_SNAT means the packet is a response packet on a NATted flow;
* hence the packet needs to be SNATted. The new src IP and port are returned in
* result.nat_ip and result.nat_port. */
CALI_CT_ESTABLISHED_SNAT = 4,
/* CALI_CT_ESTABLISHED_DNAT means the packet is a request packet on a NATted flow;
* hence the packet needs to be DNATted. The new dst IP and port are returned in
* result.nat_ip and result.nat_port. */
CALI_CT_ESTABLISHED_DNAT = 5,
/* CALI_CT_INVALID is returned for packets that cannot be parsed (e.g. invalid ICMP response)
* or for packet that have a conntrack entry that is only approved by the other leg
* (indicating that policy on this leg failed to allow the packet). */
CALI_CT_INVALID = 6,
};
#define CT_RES_RELATED 0x100
#define CT_RES_RPF_FAILED 0x200
#define CT_RES_TUN_SRC_CHANGED 0x400
#define CT_RES_RESERVED_800 0x800
#define CT_RES_SYN 0x1000
#define CT_RES_CONFIRMED 0x2000
#define ct_result_rc(rc) ((rc) & 0xff)
#define ct_result_flags(rc) ((rc) & ~0xff)
#define ct_result_set_rc(val, rc) ((val) = ct_result_flags(val) | (rc))
#define ct_result_set_flag(val, flags) ((val) |= (flags))
#define ct_result_clear_flag(val, flags) ((val) &= ~(flags))
#define ct_result_is_related(rc) ((rc) & CT_RES_RELATED)
#define ct_result_rpf_failed(rc) ((rc) & CT_RES_RPF_FAILED)
#define ct_result_tun_src_changed(rc) ((rc) & CT_RES_TUN_SRC_CHANGED)
#define ct_result_is_syn(rc) ((rc) & CT_RES_SYN)
#define ct_result_is_confirmed(rc) ((rc) & CT_RES_CONFIRMED)
struct calico_ct_result {
__s16 rc;
__u16 flags;
ipv46_addr_t nat_ip;
ipv46_addr_t nat_sip;
__u16 nat_port;
__u16 nat_sport;
ipv46_addr_t tun_ip;
__u32 ifindex_fwd; /* if set, the ifindex where the packet should be forwarded */
__u32 ifindex_created; /* For a CT state that was created by a packet ingressing
* through an interface towards the host, this is the
* ingress interface index. For a CT state created by a
* packet _from_ the host, it's CT_INVALID_IFINDEX (0).
*/
};
#endif /* __CALI_CONNTRAC_TYPESK_H__ */