Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/flow_risks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ TLS needs to carry the the `SNI <https://en.wikipedia.org/wiki/Server_Name_Indic

NDPI_HTTP_SUSPICIOUS_CONTENT
============================
HTTP only: risk reported when HTTP carries content in expected format. Example the HTTP header indicates that the context is text/html but the real content is not readeable (i.e. it can transport binary data). In general this is an attempt to use a valid MIME type to carry data that does not match the type.
HTTP only: risk reported when HTTP carries content in expected format. Example the HTTP header indicates that the context is text/html but the real content is not readable (i.e. it can transport binary data). In general this is an attempt to use a valid MIME type to carry data that does not match the type.

.. _Risk 026:

Expand Down
14 changes: 7 additions & 7 deletions example/ndpiReader.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ static int num_cfgs = 0;
int reader_log_level = 0;
char *_disabled_protocols = NULL;
static u_int8_t stats_flag = 0;
u_int8_t human_readeable_string_len = 5;
u_int8_t human_readable_string_len = 5;
u_int8_t max_num_udp_dissected_pkts = 24 /* 8 is enough for most protocols, Signal and SnapchatCall require more */, max_num_tcp_dissected_pkts = 80 /* due to telnet */;
static u_int32_t pcap_analysis_duration = (u_int32_t)-1;
static u_int32_t risk_stats[NDPI_MAX_RISK] = { 0 }, risks_found = 0, flows_with_risks = 0;
Expand Down Expand Up @@ -668,7 +668,7 @@ static void help(u_int long_help) {
" | 2 - List known risks\n"
" -d | Disable protocol guess (by ip and by port) and use only DPI.\n"
" | It is a shortcut to --cfg=dpi.guess_on_giveup,0\n"
" -e <len> | Min human readeable string match len. Default %u\n"
" -e <len> | Min human readable string match len. Default %u\n"
" -q | Quiet mode\n"
" -F | Enable flow stats\n"
" -t | Dissect GTP/TZSP tunnels\n"
Expand Down Expand Up @@ -718,7 +718,7 @@ static void help(u_int long_help) {
" --cfg=proto,param,value | Configure the specific attribute of this protocol\n"
" --dump-fpc-stats | Print FPC statistics\n"
,
human_readeable_string_len,
human_readable_string_len,
min_pattern_len, max_pattern_len, max_num_packets_per_flow, max_packet_payload_dissection,
max_num_reported_top_payloads, max_num_tcp_dissected_pkts, max_num_udp_dissected_pkts);

Expand Down Expand Up @@ -1132,7 +1132,7 @@ static void parse_parameters(int argc, char **argv)
break;

case 'e':
human_readeable_string_len = atoi(optarg);
human_readable_string_len = atoi(optarg);
break;

case 'E':
Expand Down Expand Up @@ -2233,8 +2233,8 @@ static void printFlow(u_int32_t id, struct ndpi_flow_info *flow, u_int16_t threa
if(flow->dhcp_class_ident) fprintf(out, "[DHCP Class Ident: %s]",
flow->dhcp_class_ident);

if(flow->has_human_readeable_strings) fprintf(out, "[PLAIN TEXT (%s)]",
flow->human_readeable_string_buffer);
if(flow->has_human_readable_strings) fprintf(out, "[PLAIN TEXT (%s)]",
flow->human_readable_string_buffer);

#ifdef DIRECTION_BINS
print_bin(out, "Plen c2s", &flow->payload_len_bin_src2dst);
Expand Down Expand Up @@ -2962,7 +2962,7 @@ static void dump_realtime_protocol(struct ndpi_workflow * workflow, struct ndpi_
fprintf(out, "Detected Realtime protocol %s --> [%s] %s:%d <--> %s:%d app=%s <%s>\n",
date, ndpi_get_ip_proto_name(flow->protocol, ip_proto, sizeof(ip_proto)),
srcip, ntohs(flow->src_port), dstip, ntohs(flow->dst_port),
app_name, flow->human_readeable_string_buffer);
app_name, flow->human_readable_string_buffer);
}
}

Expand Down
193 changes: 176 additions & 17 deletions example/reader_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
#include "ndpi_classify.h"

extern u_int8_t enable_flow_stats, enable_payload_analyzer;
extern u_int8_t verbose, human_readeable_string_len;
extern u_int8_t verbose, human_readable_string_len;
extern u_int8_t max_num_udp_dissected_pkts /* 24 */, max_num_tcp_dissected_pkts /* 80 */;
static u_int32_t flow_id = 0;
extern FILE *fingerprint_fp;
Expand Down Expand Up @@ -1754,6 +1754,138 @@ void update_tcp_flags_count(struct ndpi_flow_info* flow, struct ndpi_tcphdr* tcp

/* ****************************************************** */

#define MIN_ENTROPY 2.5
#define MAX_ENTROPY 4.8
/**
* @brief Calculates the Shannon entropy of a given string to estimate its randomness.
*
* This function computes the Shannon entropy of the input string by analyzing the
* frequency distribution of its characters. Higher entropy indicates more randomness,
* while lower entropy suggests more predictable or structured content.
*
* The function is useful for detecting potentially random or pseudo-random strings,
* such as passwords, tokens, or hashes.
*
* @param str A null-terminated input string to analyze.
* @return The calculated entropy as a double. Returns 0.0 for empty strings.
*
* @note Typical entropy values range between MIN_ENTROPY (2.5) and MAX_ENTROPY (4.8)
* for short strings. These thresholds can be used to classify the randomness
* level of the string.
*/
static double calculate_entropy(const char *str) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to re-implement, already present in the library; see ndpi_entropy()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

int freq[256] = {0};
int len = strlen(str);
if (len == 0) return 0.0;

for (int i = 0; i < len; i++)
freq[(unsigned char)str[i]]++;

double entropy = 0.0;
for (int i = 0; i < 256; i++) {
if (freq[i] > 0) {
double p = (double)freq[i] / len;
entropy -= p * log2(p);
}
}
return entropy;
}

/**
* @brief Checks if a character is valid for inclusion in a readable string.
*
* This function determines whether the given character is acceptable as part of
* a "readable" string based on specific rules:
*
* - Accepts the following characters explicitly: ':', '.', space, '@', and '/'.
* - Rejects characters classified as punctuation (via ndpi_ispunct).
* - Accepts alphanumeric characters (via ndpi_isdigit and ndpi_isalpha).
*
* @param c The character to validate.
* @return int
* - 1 if the character is considered valid.
* - 0 otherwise.
*/
static int readable_string_is_valid_char(char c) {
const char allowed[] = ":. @/";

if (strchr(allowed, c))
return 1;

if (ndpi_ispunct(c))
return 0;

return (ndpi_isdigit(c) || ndpi_isalpha(c));
}

/**
* @brief Validates if a string is considered human-readable.
*
* This function applies a set of heuristic rules to determine whether the provided string
* contains meaningful, human-readable content. It filters out strings that are too short,
* contain excessive special characters, or appear to be random or overly repetitive.
*
* Rules applied:
* - Must be at least 3 characters long.
* - Maximum length allowed is 1024 characters.
* - Must contain at least 2 alphabetic characters.
* - More than half the characters must not be special symbols.
* - No 3 or more consecutive non-alphanumeric, non-space characters.
* - Entropy must be within the defined range [MIN_ENTROPY, MAX_ENTROPY].
* - Each character must be validated by readable_string_is_valid_char().
*
* @note Ensure that the string is null-terminated before calling this function.
*
* @param str A pointer to the null-terminated string to validate.
* @return true if the string is considered human-readable, false otherwise.
*/
static bool ndpi_filter_readable_string(char *str) {
if (!str || strlen(str) < 3)
return false;

size_t len = strlen(str);
if (len > 1024)
return false;

size_t letters = 0, specials = 0;
for (size_t i = 0; i < len; i++) {
if (isalpha(str[i])) letters++;
else specials++;

if (!readable_string_is_valid_char(str[i]))
return false;
}

// Reject if too few alphabetic characters
if (letters < 2)
return false;

// Reject if half or more of the characters are special symbols
if (specials > len / 2)
return false;

// Reject if there are 3 or more consecutive uncommon symbols
int consecutive_specials = 0;
for (size_t i = 0; i < len; i++) {
if (!isalnum(str[i]) && !isspace(str[i])) {
consecutive_specials++;
if (consecutive_specials >= 3)
return false;
} else {
consecutive_specials = 0;
}
}

// Reject if entropy is too low (monotonous) or too high (random noise)
double entropy = calculate_entropy(str);
if (entropy < MIN_ENTROPY || entropy > MAX_ENTROPY)
return false;

return true;
}

/* ****************************************************** */

/**
Function to process the packet:
determine the flow of a packet and try to decode it
Expand Down Expand Up @@ -1935,28 +2067,55 @@ static struct ndpi_proto packet_processing(struct ndpi_workflow * workflow,
memset(&flow->flow_last_pkt_time, '\0', sizeof(flow->flow_last_pkt_time));
}

if((human_readeable_string_len != 0) && (!flow->has_human_readeable_strings)) {
if ((human_readable_string_len != 0) && (!flow->has_human_readable_strings)) {
u_int8_t skip = 0;

if(proto == IPPROTO_TCP &&
(is_ndpi_proto(flow, NDPI_PROTOCOL_TLS) ||
is_ndpi_proto(flow, NDPI_PROTOCOL_SSH))) {
if((flow->src2dst_packets+flow->dst2src_packets) < 10 /* MIN_NUM_ENCRYPT_SKIP_PACKETS */)
skip = 1; /* Skip initial negotiation packets */
if (proto == IPPROTO_TCP && (is_ndpi_proto(flow, NDPI_PROTOCOL_TLS) ||
is_ndpi_proto(flow, NDPI_PROTOCOL_SSH))) {
if ((flow->src2dst_packets+flow->dst2src_packets) <
10 /* MIN_NUM_ENCRYPT_SKIP_PACKETS */)
skip = 1; /* Skip initial negotiation packets */
}

if((!skip) && ((flow->src2dst_packets+flow->dst2src_packets) < 100)) {
if(ndpi_has_human_readeable_string((char*)packet, header->caplen,
human_readeable_string_len,
flow->human_readeable_string_buffer,
sizeof(flow->human_readeable_string_buffer)) == 1)
flow->has_human_readeable_strings = 1;
if ((!skip) && ((flow->src2dst_packets+flow->dst2src_packets) < 100)) {
ndpi_string_list_t* human_readable_string_list =
ndpi_extract_readable_strings(
(const unsigned char*)packet,
header->caplen,
human_readable_string_len,
2,
ndpi_filter_readable_string
);
if (human_readable_string_list) {
if (human_readable_string_list->count > 0 && human_readable_string_list->items[0]) {
if (human_readable_string_list->count > 1 &&
human_readable_string_list->items[1] &&
strcmp(human_readable_string_list->items[0],
human_readable_string_list->items[1]) != 0) {
snprintf(flow->human_readable_string_buffer,
sizeof(flow->human_readable_string_buffer),
"%s %s",
human_readable_string_list->items[0],
human_readable_string_list->items[1]);
} else {
snprintf(flow->human_readable_string_buffer,
sizeof(flow->human_readable_string_buffer),
"%s",
human_readable_string_list->items[0]);
}

flow->has_human_readable_strings = 1;
}

ndpi_string_list_free(human_readable_string_list);
}
}
} else {
if(proto == IPPROTO_TCP &&
(is_ndpi_proto(flow, NDPI_PROTOCOL_TLS) ||
is_ndpi_proto(flow, NDPI_PROTOCOL_SSH)))
flow->has_human_readeable_strings = 0;
if (proto == IPPROTO_TCP &&
(is_ndpi_proto(flow, NDPI_PROTOCOL_TLS) ||
is_ndpi_proto(flow, NDPI_PROTOCOL_SSH))){
flow->has_human_readable_strings = 0;
}
}
} else { // flow is NULL
workflow->stats.total_discarded_bytes += header->len;
Expand Down
4 changes: 2 additions & 2 deletions example/reader_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ typedef struct ndpi_flow_info {
u_int64_t src2dst_bytes, dst2src_bytes;
u_int64_t src2dst_goodput_bytes, dst2src_goodput_bytes;
u_int32_t src2dst_packets, dst2src_packets;
u_int32_t has_human_readeable_strings;
char human_readeable_string_buffer[32];
u_int32_t has_human_readable_strings;
char human_readable_string_buffer[128];
char *risk_str;

// result only, not used for flow identification
Expand Down
2 changes: 1 addition & 1 deletion fuzz/fuzz_ndpi_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ struct ndpi_global_context *g_ctx;

u_int8_t enable_payload_analyzer = 0;
u_int8_t enable_flow_stats = 1;
u_int8_t human_readeable_string_len = 5;
u_int8_t human_readable_string_len = 5;
u_int8_t max_num_udp_dissected_pkts = 0, max_num_tcp_dissected_pkts = 0; /* Disable limits at application layer */;
int malloc_size_stats = 0;
FILE *fingerprint_fp = NULL;
Expand Down
2 changes: 1 addition & 1 deletion fuzz/fuzz_readerutils_parseprotolist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

u_int8_t enable_payload_analyzer = 0;
u_int8_t enable_flow_stats = 0;
u_int8_t human_readeable_string_len = 5;
u_int8_t human_readable_string_len = 5;
u_int8_t max_num_udp_dissected_pkts = 16 /* 8 is enough for most protocols, Signal requires more */, max_num_tcp_dissected_pkts = 80 /* due to telnet */;
int malloc_size_stats = 0;
FILE *fingerprint_fp = NULL;
Expand Down
2 changes: 1 addition & 1 deletion fuzz/fuzz_readerutils_workflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ extern u_int8_t enable_doh_dot_detection;

u_int8_t enable_payload_analyzer = 0;
u_int8_t enable_flow_stats = 0;
u_int8_t human_readeable_string_len = 5;
u_int8_t human_readable_string_len = 5;
u_int8_t max_num_udp_dissected_pkts = 16 /* 8 is enough for most protocols, Signal requires more */, max_num_tcp_dissected_pkts = 80 /* due to telnet */;
int malloc_size_stats = 0;
FILE *fingerprint_fp = NULL;
Expand Down
22 changes: 19 additions & 3 deletions src/include/ndpi_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1169,9 +1169,25 @@ extern "C" {
const char* ndpi_tunnel2str(ndpi_packet_tunnel tt);
u_int16_t ndpi_guess_host_protocol_id(struct ndpi_detection_module_struct *ndpi_struct,
struct ndpi_flow_struct *flow);
int ndpi_has_human_readeable_string(char *buffer, u_int buffer_size,
u_int8_t min_string_match_len, /* Will return 0 if no string > min_string_match_len have been found */
char *outbuf, u_int outbuf_len);

/**
* @struct ndpi_string_list_t
* @brief Represents a dynamic list of strings.
*
* This structure manages a dynamic array of strings, keeping track of the number of
* stored items and the allocated capacity.
*/
typedef struct {
char **items; ///< Array of strings
size_t count; ///< Number of strings currently stored
size_t capacity; ///< Allocated capacity of the list
} ndpi_string_list_t;

void ndpi_string_list_free(ndpi_string_list_t *list);

ndpi_string_list_t* ndpi_extract_readable_strings(const unsigned char *buffer, size_t buffer_len,
size_t min_len, size_t list_limit, bool (*filter_func)(char *));

/* Return a flow info string (summarized). Does only work for DNS/HTTP/TLS/QUIC. */
const char* ndpi_get_flow_info(struct ndpi_flow_struct const * const flow,
ndpi_protocol const * const l7_protocol);
Expand Down
Loading
Loading