Skip to content

Commit

Permalink
mod_spam_filter: resolve http redirects so we're not fooled by url sh…
Browse files Browse the repository at this point in the history
…orteners
  • Loading branch information
sstrigler committed Jan 8, 2025
1 parent 9e747bf commit d040e73
Showing 1 changed file with 39 additions and 4 deletions.
43 changes: 39 additions & 4 deletions mod_spam_filter/src/mod_spam_filter.erl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
-include_lib("xmpp/include/xmpp.hrl").

-define(COMMAND_TIMEOUT, timer:seconds(30)).
-define(HTTPC_TIMEOUT, timer:seconds(3)).

-type url() :: binary().
-type filename() :: binary() | none.
Expand Down Expand Up @@ -197,6 +198,9 @@ handle_call({check_body, URLs, JIDs, From}, _From,
Result2
end,
{reply, {spam_filter, Result}, State2};
handle_call({resolve_redirects, URLs}, _From, State) ->
ResolvedURLs = do_resolve_redirects(URLs, []),
{reply, {spam_filter, ResolvedURLs}, State};
handle_call({reload_files, JIDsFile, URLsFile}, _From, State) ->
{Result, State1} = reload_files(JIDsFile, URLsFile, State),
{reply, {spam_filter, Result}, State1};
Expand Down Expand Up @@ -390,7 +394,7 @@ check_from(Host, From) ->

-spec check_body(binary(), jid(), binary()) -> ham | spam.
check_body(Host, From, Body) ->
case {extract_urls(Body), extract_jids(Body)} of
case {extract_urls(Host, Body), extract_jids(Body)} of
{none, none} ->
?DEBUG("No JIDs/URLs found in message", []),
ham;
Expand All @@ -406,17 +410,48 @@ check_body(Host, From, Body) ->
end
end.

-spec extract_urls(binary()) -> {urls, [url()]} | none.
extract_urls(Body) ->
-spec extract_urls(binary(), binary()) -> {urls, [url()]} | none.
extract_urls(Host, Body) ->
RE = <<"https?://\\S+">>,
Options = [global, {capture, all, binary}],
case re:run(Body, RE, Options) of
{match, Captured} when is_list(Captured) ->
{urls, lists:flatten(Captured)};
Urls = resolve_redirects(Host, lists:flatten(Captured)),
{urls, Urls};
nomatch ->
none
end.

-spec resolve_redirects(binary(), [url()]) -> [url()].
resolve_redirects(Host, URLs) ->
Proc = get_proc_name(Host),
try gen_server:call(Proc, {resolve_redirects, URLs}) of
{spam_filter, ResolvedURLs} ->
ResolvedURLs
catch exit:{timeout, _} ->
?WARNING_MSG("Timeout while resolving redirect: ~p", [URLs]),
URLs
end.

-spec do_resolve_redirects([url()], [url()]) -> [url()].
do_resolve_redirects([], Result) -> Result;
do_resolve_redirects([URL | Rest], Acc) ->
case
httpc:request(get, {URL, [{"user-agent", "curl/8.7.1"}]},
[{autoredirect, false}, {timeout, ?HTTPC_TIMEOUT}], [])
of
{ok, {{_, Moved, _}, Headers, _Body}} when Moved >= 300, Moved < 400 ->
Location = proplists:get_value("location", Headers),
case lists:member(Location, Acc) of
false ->
do_resolve_redirects([Location | Rest], [URL | Acc]);
true ->
do_resolve_redirects(Rest, [URL | Acc])
end;
_Res ->
do_resolve_redirects(Rest, [URL | Acc])
end.

-spec extract_jids(binary()) -> {jids, [ljid()]} | none.
extract_jids(Body) ->
RE = <<"\\S+@\\S+">>,
Expand Down

0 comments on commit d040e73

Please sign in to comment.