-
Notifications
You must be signed in to change notification settings - Fork 390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix the deadlock between the exporter and the conntrack polling go routines #2429
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -301,15 +301,10 @@ func (exp *flowExporter) sendFlowRecords() error { | |
exp.numDataSetsSent = exp.numDataSetsSent + 1 | ||
|
||
if flowexporter.IsConnectionDying(&record.Conn) { | ||
// If the connection is in dying state or connection is not in conntrack table, | ||
// we will delete the flow records from records map. | ||
klog.V(2).Infof("Deleting the inactive flow records with key: %v from record map", key) | ||
if err := exp.flowRecords.DeleteFlowRecordWithoutLock(key); err != nil { | ||
return err | ||
} | ||
if err := exp.conntrackConnStore.SetExportDone(key); err != nil { | ||
return err | ||
} | ||
// If the connection is in dying state or connection is not in conntrack | ||
// table, we set the DyingAndDoneExport flag to do the deletion later. | ||
record.DyingAndDoneExport = true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as a future improvement, maybe we should just change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. Or I was thinking to store the pointer to the record in the record map instead of value. |
||
exp.flowRecords.AddFlowRecordWithoutLock(&key, &record) | ||
} else { | ||
exp.flowRecords.ValidateAndUpdateStats(key, record) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,6 @@ | |
package flowrecords | ||
|
||
import ( | ||
"fmt" | ||
"sync" | ||
"time" | ||
|
||
|
@@ -37,11 +36,12 @@ func NewFlowRecords() *FlowRecords { | |
|
||
// AddOrUpdateFlowRecord adds or updates the flow record in the record map given the connection. | ||
// It makes a copy of the connection object to record, to avoid race conditions between the | ||
// connection store and the flow exporter. | ||
// connection store and the flow exporter. We expect caller to hold the lock for | ||
// the connection store. | ||
func (fr *FlowRecords) AddOrUpdateFlowRecord(key flowexporter.ConnectionKey, conn *flowexporter.Connection) error { | ||
// If the connection is in dying state and the corresponding flow records are already | ||
// exported, then there is no need to add or update the record. | ||
if flowexporter.IsConnectionDying(conn) && conn.DoneExport { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Changed the name of the flag. Removed this extra check here and other places as well. |
||
// If the connection is in dying state and is already exported, then there is | ||
// no need to add or update the record. | ||
if conn.DyingAndDoneExport { | ||
return nil | ||
} | ||
|
||
|
@@ -63,8 +63,19 @@ func (fr *FlowRecords) AddOrUpdateFlowRecord(key flowexporter.ConnectionKey, con | |
IsIPv6: isIPv6, | ||
LastExportTime: conn.StartTime, | ||
IsActive: true, | ||
DyingAndDoneExport: false, | ||
} | ||
} else { | ||
// If the connection is in dying state and the corresponding flow records are already | ||
// exported, then update the DyingAndDoneExport flag on the connection. | ||
if record.DyingAndDoneExport { | ||
// It is safe to update the connection as we hold the connection map | ||
// lock when calling this function. | ||
conn.DyingAndDoneExport = true | ||
delete(fr.recordsMap, key) | ||
klog.V(2).InfoS("Deleting the inactive flow records in record map", "FlowKey", key) | ||
return nil | ||
} | ||
// set IsActive flag to true when there are changes either in stats or TCP state | ||
if (conn.OriginalPackets > record.PrevPackets) || (conn.ReversePackets > record.PrevReversePackets) || record.Conn.TCPState != conn.TCPState { | ||
record.IsActive = true | ||
|
@@ -83,6 +94,12 @@ func (fr *FlowRecords) AddFlowRecordToMap(connKey *flowexporter.ConnectionKey, r | |
fr.recordsMap[*connKey] = *record | ||
} | ||
|
||
// AddFlowRecordWithoutLock adds the flow record from record map given connection key. | ||
// Caller is expected to grab the lock the record map. | ||
func (fr *FlowRecords) AddFlowRecordWithoutLock(connKey *flowexporter.ConnectionKey, record *flowexporter.FlowRecord) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe you can address naming consistency with the function above ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes this can be done. Hope you are ok with the explanation in the other comment. |
||
fr.recordsMap[*connKey] = *record | ||
} | ||
|
||
// GetFlowRecordFromMap gets the flow record from record map given connection key. | ||
// This is used only for unit tests. | ||
func (fr *FlowRecords) GetFlowRecordFromMap(connKey *flowexporter.ConnectionKey) (*flowexporter.FlowRecord, bool) { | ||
|
@@ -92,17 +109,6 @@ func (fr *FlowRecords) GetFlowRecordFromMap(connKey *flowexporter.ConnectionKey) | |
return &record, exists | ||
} | ||
|
||
// DeleteFlowRecordWithoutLock deletes the record from the record map given | ||
// the connection key without grabbing the lock. Caller is expected to grab lock. | ||
func (fr *FlowRecords) DeleteFlowRecordWithoutLock(connKey flowexporter.ConnectionKey) error { | ||
_, exists := fr.recordsMap[connKey] | ||
if !exists { | ||
return fmt.Errorf("flow record with key %v doesn't exist in map", connKey) | ||
} | ||
delete(fr.recordsMap, connKey) | ||
return nil | ||
} | ||
|
||
// ValidateAndUpdateStats validates and updates the flow record given the connection | ||
// key. Caller is expected to grab lock. | ||
func (fr *FlowRecords) ValidateAndUpdateStats(connKey flowexporter.ConnectionKey, record flowexporter.FlowRecord) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this function
DeleteFlowRecordWithoutLock(key)
can be removed, right?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. Done.