This repository has been archived by the owner on Feb 29, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsd_score.h
108 lines (90 loc) · 3.79 KB
/
sd_score.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// Copyright (C) 2022 Valentin-Ioan VINTILA (313CA / 2021-2022).
// All rights reserved.
#ifndef SD_SCORE_H
#define SD_SCORE_H
// This header contains many different ways to score a given email as spam or
// ham.
// General header file
#include "spam_detector.h"
// After this value, an email is a little suspicious
#define SPAM_LOW_SUSPICION 35.0
// After this value, an email is really suspicious
#define SPAM_HIGH_SUSPICION 42.0
// After this value, an email is considered spam.
#define SPAM_THRESHOLD 50.0
// This function checks if a given email is a response to another.
// - - - - -
// email_t *email = The email to be checked.
// - - - - -
// Return: bool = true, if the email was a response.
extern bool is_response(email_t *email);
// If enough "> " substrings are found, the email could be indeed ham. This is
// a metric similar to the response one.
// - - - - -
// email_t *email = The email to be checked.
// - - - - -
// Return: bool = true, if the email might have been a forward.
extern bool is_great(email_t *email);
// This function scores an email based on the ammount of uppercase letters
// present in its original content.
// - - - - -
// double *score = The end result.
// email_t *email = The email to be scored.
extern void score_uppercase(double *score, email_t *email);
// This function scores an email based on its header.
// Note: This function is currently DISABLED!
// - - - - -
// double *score = The end result.
// email_t *email = The email to be scored.
extern void score_header(double *score, email_t *email);
// This function scores an email based on the ammount of consecutive consonants.
// - - - - -
// double *score = The end result.
// email_t *email = The email to be scored.
extern void score_consonants(double *score, email_t *email);
// This function scores an email based on its punctuation.
// - - - - -
// double *score = The end result.
// email_t *email = The email to be scored.
extern void score_punctuation(double *score, email_t *email);
// This function scores an email based on the 'data/spammers' file.
// - - - - -
// double *score = The end result.
// email_t *email = The email to be scored.
// - - - - -
// Return: error_t = Could be CRITICAL_INPUT_SP in something wrong happened
// when trying to input from data/spammers.
extern error_t score_known_spammers(double *score, email_t *email);
// This functions scores the first 100 characters of the email. If these are
// "spamy", the whole email has a way higher chance of also being spam.
// - - - - -
// double *score = The end result.
// email_t *email = The email to be scored.
// kw_collection_t *kw_a_kw = The spam words collection.
// double *k_score = The original score that will be updated.
extern void score_first_100(double *score, email_t *email,
kw_collection_t *kw_a_kw, double *k_score);
// This function scores how many connections can be found in a given email.
// - - - - -
// double *score = The end result.
// email_t *email = The email to be scored.
extern void score_connectivity(double *score, email_t *email,
kw_collection_t *kw_connectivity);
// This function scores an email in general.
// - - - - -
// double *score = The end result.
// email_t *email = The email to be scored.
// - - - - -
// Return: error_t = In case something wrong happens.
extern error_t score_all(double *score, email_t *email,
kw_collection_t *kw_a_kw,
kw_collection_t *kw_connectivity,
kw_collection_t *kw_news,
kw_collection_t *kw_positive_kw);
// This function takes into consideration duplicate spam emails.
// - - - - -
// int n = The number of emails.
// hash_t *hashed = The hashes for said emails.
// double *score = The scores that will be updated.
extern void post_score(int n, hash_t *hashes, double *scores);
#endif // SD_SCORE_H