5151#include "misc/json.h"
5252#include "misc/language.h"
5353#include "misc/bstr.h"
54+ #include "misc/string_similarity.h"
5455
5556#include "audio/out/ao.h"
5657#include "filters/f_decoder_wrapper.h"
@@ -942,86 +943,6 @@ int mp_add_external_file(struct MPContext *mpctx, char *filename,
942943 return -1 ;
943944}
944945
945- // Helpers for sub-auto=closest selection
946- static bool is_suffix_token (const char * tkn )
947- {
948- int len = (int )strlen (tkn );
949- bool alpha = true;
950- for (int i = 0 ; i < len ; i ++ )
951- alpha &= isalpha ((unsigned char )tkn [i ]) != 0 ;
952- if (alpha && (len == 2 || len == 3 ))
953- return true;
954- const char * special [] = {"eng" ,"en" ,"es" ,"fr" ,"de" ,"pt" ,"ru" ,"jp" ,"ja" ,"zh" ,"chs" ,"cht" ,"sub" ,"subs" ,"sdh" ,"forced" ,"cc" ,NULL };
955- for (int i = 0 ; special [i ]; i ++ )
956- if (strcmp (tkn , special [i ]) == 0 )
957- return true;
958- return false;
959- }
960-
961- static char * normalize_base_name (void * ta_ctx , const char * path )
962- {
963- struct bstr base = bstr0 (mp_basename (path ));
964- base = bstr_strip_ext (base );
965- char * tmpbuf = talloc_strndup (ta_ctx , base .start , base .len );
966- for (int i = 0 ; tmpbuf [i ]; i ++ )
967- tmpbuf [i ] = tolower ((unsigned char )tmpbuf [i ]);
968- char * * tokens = NULL ;
969- int ntok = 0 ;
970- char * p = tmpbuf ;
971- while (* p ) {
972- while (* p && !isalnum ((unsigned char )* p )) p ++ ;
973- if (!* p ) break ;
974- char * start = p ;
975- while (* p && isalnum ((unsigned char )* p )) p ++ ;
976- char save = * p ; * p = '\0' ;
977- MP_TARRAY_APPEND (ta_ctx , tokens , ntok , talloc_strdup (ta_ctx , start ));
978- * p = save ;
979- }
980- while (ntok > 0 && is_suffix_token (tokens [ntok - 1 ]))
981- ntok -- ;
982- char * out = talloc_strdup (ta_ctx , "" );
983- for (int i = 0 ; i < ntok ; i ++ )
984- out = talloc_asprintf_append_buffer (out , "%s" , tokens [i ]);
985- if (!out [0 ])
986- out = talloc_strdup (ta_ctx , tmpbuf );
987- return out ;
988- }
989-
990- static int levenshtein_dist (const char * a , const char * b )
991- {
992- int la = (int )strlen (a ), lb = (int )strlen (b );
993- if (la == 0 ) return lb ;
994- if (lb == 0 ) return la ;
995- int * prev = talloc_array (NULL , int , lb + 1 );
996- int * curr = talloc_array (NULL , int , lb + 1 );
997- for (int j = 0 ; j <= lb ; j ++ ) prev [j ] = j ;
998- for (int i = 1 ; i <= la ; i ++ ) {
999- curr [0 ] = i ;
1000- for (int j = 1 ; j <= lb ; j ++ ) {
1001- int cost = a [i - 1 ] == b [j - 1 ] ? 0 : 1 ;
1002- int del = prev [j ] + 1 ;
1003- int ins = curr [j - 1 ] + 1 ;
1004- int sub = prev [j - 1 ] + cost ;
1005- int m = del < ins ? del : ins ;
1006- curr [j ] = m < sub ? m : sub ;
1007- }
1008- int * tmpv = prev ; prev = curr ; curr = tmpv ;
1009- }
1010- int d = prev [lb ];
1011- talloc_free (prev );
1012- talloc_free (curr );
1013- return d ;
1014- }
1015-
1016- static double similarity_ratio (const char * a , const char * b )
1017- {
1018- int la = (int )strlen (a ), lb = (int )strlen (b );
1019- int m = la > lb ? la : lb ;
1020- if (m == 0 ) return 1.0 ;
1021- int d = levenshtein_dist (a , b );
1022- return 1.0 - (double )d / (double )m ;
1023- }
1024-
1025946// Returns true if a season/episode could be parsed.
1026947// Recognizes common patterns like S01E02, s1e2, or 1x02 (case-insensitive).
1027948static bool parse_season_episode (const char * path , int * out_season , int * out_episode )
@@ -1141,7 +1062,7 @@ void autoload_external_files(struct MPContext *mpctx, struct mp_cancel *cancel)
11411062 int best_sub_index = -1 ;
11421063 if (opts -> sub_auto == 3 ) {
11431064 void * selctx = talloc_new (tmp );
1144- char * movie_norm = normalize_base_name (selctx , mpctx -> filename );
1065+ char * movie_norm = mp_normalize_base_name (selctx , mpctx -> filename );
11451066 int mv_season = -1 , mv_episode = -1 ;
11461067 bool mv_has_se = parse_season_episode (mpctx -> filename , & mv_season , & mv_episode );
11471068 double best_score = -1.0 ;
@@ -1162,8 +1083,8 @@ void autoload_external_files(struct MPContext *mpctx, struct mp_cancel *cancel)
11621083 continue ;
11631084 if (!sc [STREAM_VIDEO ] && !sc [STREAM_AUDIO ])
11641085 continue ;
1165- char * cand_norm = normalize_base_name (selctx , e -> fname );
1166- double score = similarity_ratio (movie_norm , cand_norm );
1086+ char * cand_norm = mp_normalize_base_name (selctx , e -> fname );
1087+ double score = mp_similarity_ratio (movie_norm , cand_norm );
11671088 int cand_season = -1 , cand_episode = -1 ;
11681089 int match_se = 0 ;
11691090 if (mv_has_se && parse_season_episode (e -> fname , & cand_season , & cand_episode )) {
0 commit comments