From 2aab4e03c3a2867abd4555dc776eebc8b0dba176 Mon Sep 17 00:00:00 2001 From: Maxim Filippov Date: Tue, 1 Jan 2019 23:26:40 +0300 Subject: [PATCH 1/9] Add OGP parser --- lib/pleroma/web/rich_media/data.ex | 3 +++ lib/pleroma/web/rich_media/parser.ex | 14 +++++++++++ lib/pleroma/web/rich_media/parsers/ogp.ex | 30 +++++++++++++++++++++++ mix.exs | 3 ++- mix.lock | 2 ++ test/fixtures/rich_media/ogp.html | 9 +++++++ test/web/rich_media/parser_test.exs | 26 ++++++++++++++++++++ 7 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 lib/pleroma/web/rich_media/data.ex create mode 100644 lib/pleroma/web/rich_media/parser.ex create mode 100644 lib/pleroma/web/rich_media/parsers/ogp.ex create mode 100644 test/fixtures/rich_media/ogp.html create mode 100644 test/web/rich_media/parser_test.exs diff --git a/lib/pleroma/web/rich_media/data.ex b/lib/pleroma/web/rich_media/data.ex new file mode 100644 index 000000000..403d1d341 --- /dev/null +++ b/lib/pleroma/web/rich_media/data.ex @@ -0,0 +1,3 @@ +defmodule Pleroma.Web.RichMedia.Data do + defstruct [:title, :type, :image, :url, :description] +end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex new file mode 100644 index 000000000..d9c1684d5 --- /dev/null +++ b/lib/pleroma/web/rich_media/parser.ex @@ -0,0 +1,14 @@ +defmodule Pleroma.Web.RichMedia.Parser do + @parsers [Pleroma.Web.RichMedia.Parsers.OGP] + + def parse(url) do + {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) + + Enum.reduce_while(@parsers, %Pleroma.Web.RichMedia.Data{}, fn parser, acc -> + case parser.parse(html, acc) do + {:ok, data} -> {:halt, data} + {:error, _msg} -> {:cont, acc} + end + end) + end +end diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex new file mode 100644 index 000000000..75084c7ee --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -0,0 +1,30 @@ +defmodule Pleroma.Web.RichMedia.Parsers.OGP do + def parse(html, data) do + with elements = [_ | _] <- get_elements(html), + ogp_data = + Enum.reduce(elements, data, fn el, acc -> + attributes = normalize_attributes(el) + + Map.merge(acc, attributes) + end) do + {:ok, ogp_data} + else + _e -> {:error, "No OGP metadata found"} + end + end + + defp get_elements(html) do + html |> Floki.find("meta[property^='og:']") + end + + defp normalize_attributes(tuple) do + {_tag, attributes, _children} = tuple + + data = + Enum.into(attributes, %{}, fn {name, value} -> + {name, String.trim_leading(value, "og:")} + end) + + %{String.to_atom(data["property"]) => data["content"]} + end +end diff --git a/mix.exs b/mix.exs index efdd4b7ed..0b8052d0a 100644 --- a/mix.exs +++ b/mix.exs @@ -75,7 +75,8 @@ defmodule Pleroma.Mixfile do {:web_push_encryption, "~> 0.2.1"}, {:swoosh, "~> 0.20"}, {:gen_smtp, "~> 0.13"}, - {:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test} + {:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test}, + {:floki, "~> 0.20.0"} ] end diff --git a/mix.lock b/mix.lock index 323f55bb9..d6836231d 100644 --- a/mix.lock +++ b/mix.lock @@ -20,9 +20,11 @@ "ex_aws_s3": {:hex, :ex_aws_s3, "2.0.1", "9e09366e77f25d3d88c5393824e613344631be8db0d1839faca49686e99b6704", [:mix], [{:ex_aws, "~> 2.0", [hex: :ex_aws, repo: "hexpm", optional: false]}, {:sweet_xml, ">= 0.0.0", [hex: :sweet_xml, repo: "hexpm", optional: true]}], "hexpm"}, "ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"}, "ex_machina": {:hex, :ex_machina, "2.2.0", "fec496331e04fc2db2a1a24fe317c12c0c4a50d2beb8ebb3531ed1f0d84be0ed", [:mix], [{:ecto, "~> 2.1", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"}, + "floki": {:hex, :floki, "0.20.4", "be42ac911fece24b4c72f3b5846774b6e61b83fe685c2fc9d62093277fb3bc86", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}, {:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"}, "gen_smtp": {:hex, :gen_smtp, "0.13.0", "11f08504c4bdd831dc520b8f84a1dce5ce624474a797394e7aafd3c29f5dcd25", [:rebar3], [], "hexpm"}, "gettext": {:hex, :gettext, "0.15.0", "40a2b8ce33a80ced7727e36768499fc9286881c43ebafccae6bab731e2b2b8ce", [:mix], [], "hexpm"}, "hackney": {:hex, :hackney, "1.14.3", "b5f6f5dcc4f1fba340762738759209e21914516df6be440d85772542d4a5e412", [:rebar3], [{:certifi, "2.4.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, + "html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm"}, "html_sanitize_ex": {:hex, :html_sanitize_ex, "1.3.0", "f005ad692b717691203f940c686208aa3d8ffd9dd4bb3699240096a51fa9564e", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"}, "httpoison": {:hex, :httpoison, "1.2.0", "2702ed3da5fd7a8130fc34b11965c8cfa21ade2f232c00b42d96d4967c39a3a3", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"}, diff --git a/test/fixtures/rich_media/ogp.html b/test/fixtures/rich_media/ogp.html new file mode 100644 index 000000000..c886b5871 --- /dev/null +++ b/test/fixtures/rich_media/ogp.html @@ -0,0 +1,9 @@ + + + The Rock (1996) + + + + + + diff --git a/test/web/rich_media/parser_test.exs b/test/web/rich_media/parser_test.exs new file mode 100644 index 000000000..bb0d663e9 --- /dev/null +++ b/test/web/rich_media/parser_test.exs @@ -0,0 +1,26 @@ +defmodule Pleroma.Web.RichMedia.ParserTest do + use ExUnit.Case, async: true + + setup do + Tesla.Mock.mock(fn + %{ + method: :get, + url: "http://example.com/ogp" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} + end) + + :ok + end + + test "parses ogp" do + assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") == + %Pleroma.Web.RichMedia.Data{ + description: nil, + image: "http://ia.media-imdb.com/images/rock.jpg", + title: "The Rock", + type: "video.movie", + url: "http://www.imdb.com/title/tt0117500/" + } + end +end From 917d48d09bad573260bc816310ee2c75d4db84a8 Mon Sep 17 00:00:00 2001 From: Maxim Filippov Date: Tue, 1 Jan 2019 23:29:47 +0300 Subject: [PATCH 2/9] Better variable name --- lib/pleroma/web/rich_media/parsers/ogp.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex index 75084c7ee..5773a5263 100644 --- a/lib/pleroma/web/rich_media/parsers/ogp.ex +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -17,8 +17,8 @@ defmodule Pleroma.Web.RichMedia.Parsers.OGP do html |> Floki.find("meta[property^='og:']") end - defp normalize_attributes(tuple) do - {_tag, attributes, _children} = tuple + defp normalize_attributes(html_node) do + {_tag, attributes, _children} = html_node data = Enum.into(attributes, %{}, fn {name, value} -> From 551c3d9391c0eeb282d750979b8eef5025c41482 Mon Sep 17 00:00:00 2001 From: sxsdv1 Date: Tue, 1 Jan 2019 22:16:46 +0100 Subject: [PATCH 3/9] Split create activity specifics from update_outbox --- .../activity_pub/activity_pub_controller.ex | 42 ++++++++++++------- .../activity_pub_controller_test.exs | 17 ++++++++ 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/lib/pleroma/web/activity_pub/activity_pub_controller.ex b/lib/pleroma/web/activity_pub/activity_pub_controller.ex index fc7972eaf..d23c54933 100644 --- a/lib/pleroma/web/activity_pub/activity_pub_controller.ex +++ b/lib/pleroma/web/activity_pub/activity_pub_controller.ex @@ -165,9 +165,29 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubController do end end + def handle_user_activity(user, %{"type" => "Create"} = params) do + object = + params["object"] + |> Map.merge(Map.take(params, ["to", "cc"])) + |> Map.put("attributedTo", user.ap_id()) + |> Transmogrifier.fix_object() + + ActivityPub.create(%{ + to: params["to"], + actor: user, + context: object["context"], + object: object, + additional: Map.take(params, ["cc"]) + }) + end + + def handle_user_activity(_, _) do + {:error, "Unhandled activity type"} + end + def update_outbox( %{assigns: %{user: user}} = conn, - %{"nickname" => nickname, "type" => "Create"} = params + %{"nickname" => nickname} = params ) do if nickname == user.nickname do actor = user.ap_id() @@ -178,24 +198,16 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubController do |> Map.put("actor", actor) |> Transmogrifier.fix_addressing() - object = - params["object"] - |> Map.merge(Map.take(params, ["to", "cc"])) - |> Map.put("attributedTo", actor) - |> Transmogrifier.fix_object() - - with {:ok, %Activity{} = activity} <- - ActivityPub.create(%{ - to: params["to"], - actor: user, - context: object["context"], - object: object, - additional: Map.take(params, ["cc"]) - }) do + with {:ok, %Activity{} = activity} <- handle_user_activity(user, params) do conn |> put_status(:created) |> put_resp_header("location", activity.data["id"]) |> json(activity.data) + else + {:error, message} -> + conn + |> put_status(:bad_request) + |> json(message) end else conn diff --git a/test/web/activity_pub/activity_pub_controller_test.exs b/test/web/activity_pub/activity_pub_controller_test.exs index cb95e0e09..77dc96617 100644 --- a/test/web/activity_pub/activity_pub_controller_test.exs +++ b/test/web/activity_pub/activity_pub_controller_test.exs @@ -192,6 +192,23 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubControllerTest do result = json_response(conn, 201) assert Activity.get_by_ap_id(result["id"]) end + + test "it rejects an incoming activity with bogus type", %{conn: conn} do + data = File.read!("test/fixtures/activitypub-client-post-activity.json") |> Poison.decode!() + user = insert(:user) + + data = + data + |> Map.put("type", "BadType") + + conn = + conn + |> assign(:user, user) + |> put_req_header("content-type", "application/activity+json") + |> post("/users/#{user.nickname}/outbox", data) + + assert json_response(conn, 400) + end end describe "/users/:nickname/followers" do From 4e1cc2bab6ec76af1b6de986561a82887d18f366 Mon Sep 17 00:00:00 2001 From: sxsdv1 Date: Tue, 1 Jan 2019 23:19:40 +0100 Subject: [PATCH 4/9] Implement delete activity --- .../activity_pub/activity_pub_controller.ex | 10 ++++ .../activity_pub_controller_test.exs | 49 ++++++++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/web/activity_pub/activity_pub_controller.ex b/lib/pleroma/web/activity_pub/activity_pub_controller.ex index d23c54933..a3f736fee 100644 --- a/lib/pleroma/web/activity_pub/activity_pub_controller.ex +++ b/lib/pleroma/web/activity_pub/activity_pub_controller.ex @@ -181,6 +181,16 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubController do }) end + def handle_user_activity(user, %{"type" => "Delete"} = params) do + with %Object{} = object <- Object.normalize(params["object"]), + true <- user.info.is_moderator || user.ap_id == object.data["actor"], + {:ok, delete} <- ActivityPub.delete(object) do + {:ok, delete} + else + _ -> {:error, "Can't delete object"} + end + end + def handle_user_activity(_, _) do {:error, "Unhandled activity type"} end diff --git a/test/web/activity_pub/activity_pub_controller_test.exs b/test/web/activity_pub/activity_pub_controller_test.exs index 77dc96617..620e03674 100644 --- a/test/web/activity_pub/activity_pub_controller_test.exs +++ b/test/web/activity_pub/activity_pub_controller_test.exs @@ -6,7 +6,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubControllerTest do use Pleroma.Web.ConnCase import Pleroma.Factory alias Pleroma.Web.ActivityPub.{UserView, ObjectView} - alias Pleroma.{Repo, User} + alias Pleroma.{Object, Repo, User} alias Pleroma.Activity setup_all do @@ -179,7 +179,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubControllerTest do assert json_response(conn, 403) end - test "it inserts an incoming activity into the database", %{conn: conn} do + test "it inserts an incoming create activity into the database", %{conn: conn} do data = File.read!("test/fixtures/activitypub-client-post-activity.json") |> Poison.decode!() user = insert(:user) @@ -209,6 +209,51 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubControllerTest do assert json_response(conn, 400) end + + test "it erects a tombstone when receiving a delete activity", %{conn: conn} do + note_activity = insert(:note_activity) + user = User.get_cached_by_ap_id(note_activity.data["actor"]) + + data = %{ + type: "Delete", + object: %{ + id: note_activity.data["object"]["id"] + } + } + + conn = + conn + |> assign(:user, user) + |> put_req_header("content-type", "application/activity+json") + |> post("/users/#{user.nickname}/outbox", data) + + result = json_response(conn, 201) + assert Activity.get_by_ap_id(result["id"]) + + object = Object.get_by_ap_id(note_activity.data["object"]["id"]) + assert object + assert object.data["type"] == "Tombstone" + end + + test "it rejects delete activity of object from other actor", %{conn: conn} do + note_activity = insert(:note_activity) + user = insert(:user) + + data = %{ + type: "Delete", + object: %{ + id: note_activity.data["object"]["id"] + } + } + + conn = + conn + |> assign(:user, user) + |> put_req_header("content-type", "application/activity+json") + |> post("/users/#{user.nickname}/outbox", data) + + assert json_response(conn, 400) + end end describe "/users/:nickname/followers" do From 48e81d3d40d334bccb8438c61ab6b307ddb1392f Mon Sep 17 00:00:00 2001 From: Maxim Filippov Date: Wed, 2 Jan 2019 17:02:50 +0300 Subject: [PATCH 5/9] Add RichMediaController and tests --- .../controllers/rich_media_controller.ex | 17 ++++++ lib/pleroma/web/rich_media/data.ex | 3 -- lib/pleroma/web/rich_media/parser.ex | 14 ++++- lib/pleroma/web/router.ex | 6 +++ .../rich_media_controller_test.exs | 54 +++++++++++++++++++ test/web/rich_media/parser_test.exs | 21 +++++--- 6 files changed, 104 insertions(+), 11 deletions(-) create mode 100644 lib/pleroma/web/rich_media/controllers/rich_media_controller.ex delete mode 100644 lib/pleroma/web/rich_media/data.ex create mode 100644 test/web/rich_media/controllers/rich_media_controller_test.exs diff --git a/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex b/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex new file mode 100644 index 000000000..91019961d --- /dev/null +++ b/lib/pleroma/web/rich_media/controllers/rich_media_controller.ex @@ -0,0 +1,17 @@ +defmodule Pleroma.Web.RichMedia.RichMediaController do + use Pleroma.Web, :controller + + import Pleroma.Web.ControllerHelper, only: [json_response: 3] + + def parse(conn, %{"url" => url}) do + case Pleroma.Web.RichMedia.Parser.parse(url) do + {:ok, data} -> + conn + |> json_response(200, data) + + {:error, msg} -> + conn + |> json_response(404, msg) + end + end +end diff --git a/lib/pleroma/web/rich_media/data.ex b/lib/pleroma/web/rich_media/data.ex deleted file mode 100644 index 403d1d341..000000000 --- a/lib/pleroma/web/rich_media/data.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule Pleroma.Web.RichMedia.Data do - defstruct [:title, :type, :image, :url, :description] -end diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index d9c1684d5..477a38196 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -4,11 +4,23 @@ defmodule Pleroma.Web.RichMedia.Parser do def parse(url) do {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) - Enum.reduce_while(@parsers, %Pleroma.Web.RichMedia.Data{}, fn parser, acc -> + html |> maybe_parse() |> get_parsed_data() + end + + defp maybe_parse(html) do + Enum.reduce_while(@parsers, %{}, fn parser, acc -> case parser.parse(html, acc) do {:ok, data} -> {:halt, data} {:error, _msg} -> {:cont, acc} end end) end + + defp get_parsed_data(data) when data == %{} do + {:error, "No metadata found"} + end + + defp get_parsed_data(data) do + {:ok, data} + end end diff --git a/lib/pleroma/web/router.ex b/lib/pleroma/web/router.ex index 1f929ee21..8df45bf4d 100644 --- a/lib/pleroma/web/router.ex +++ b/lib/pleroma/web/router.ex @@ -232,6 +232,12 @@ defmodule Pleroma.Web.Router do put("/settings", MastodonAPIController, :put_settings) end + scope "/api", Pleroma.Web.RichMedia do + pipe_through(:authenticated_api) + + get("/rich_media/parse", RichMediaController, :parse) + end + scope "/api/v1", Pleroma.Web.MastodonAPI do pipe_through(:api) get("/instance", MastodonAPIController, :masto_instance) diff --git a/test/web/rich_media/controllers/rich_media_controller_test.exs b/test/web/rich_media/controllers/rich_media_controller_test.exs new file mode 100644 index 000000000..37c82631f --- /dev/null +++ b/test/web/rich_media/controllers/rich_media_controller_test.exs @@ -0,0 +1,54 @@ +defmodule Pleroma.Web.RichMedia.RichMediaControllerTest do + use Pleroma.Web.ConnCase + import Pleroma.Factory + + setup do + Tesla.Mock.mock(fn + %{ + method: :get, + url: "http://example.com/ogp" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} + + %{method: :get, url: "http://example.com/empty"} -> + %Tesla.Env{status: 200, body: "hello"} + end) + + :ok + end + + describe "GET /api/rich_media/parse" do + setup do + user = insert(:user) + + [user: user] + end + + test "returns 404 if not metadata found", %{user: user} do + build_conn() + |> with_credentials(user.nickname, "test") + |> get("/api/rich_media/parse", %{"url" => "http://example.com/empty"}) + |> json_response(404) + end + + test "returns OGP metadata", %{user: user} do + response = + build_conn() + |> with_credentials(user.nickname, "test") + |> get("/api/rich_media/parse", %{"url" => "http://example.com/ogp"}) + |> json_response(200) + + assert response == %{ + "image" => "http://ia.media-imdb.com/images/rock.jpg", + "title" => "The Rock", + "type" => "video.movie", + "url" => "http://www.imdb.com/title/tt0117500/" + } + end + end + + defp with_credentials(conn, username, password) do + header_content = "Basic " <> Base.encode64("#{username}:#{password}") + put_req_header(conn, "authorization", header_content) + end +end diff --git a/test/web/rich_media/parser_test.exs b/test/web/rich_media/parser_test.exs index bb0d663e9..caf81e9fa 100644 --- a/test/web/rich_media/parser_test.exs +++ b/test/web/rich_media/parser_test.exs @@ -8,19 +8,26 @@ defmodule Pleroma.Web.RichMedia.ParserTest do url: "http://example.com/ogp" } -> %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} + + %{method: :get, url: "http://example.com/empty"} -> + %Tesla.Env{status: 200, body: "hello"} end) :ok end + test "returns error when no metadata present" do + assert {:error, _} = Pleroma.Web.RichMedia.Parser.parse("http://example.com/empty") + end + test "parses ogp" do assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") == - %Pleroma.Web.RichMedia.Data{ - description: nil, - image: "http://ia.media-imdb.com/images/rock.jpg", - title: "The Rock", - type: "video.movie", - url: "http://www.imdb.com/title/tt0117500/" - } + {:ok, + %{ + image: "http://ia.media-imdb.com/images/rock.jpg", + title: "The Rock", + type: "video.movie", + url: "http://www.imdb.com/title/tt0117500/" + }} end end From 846082e54fcb9dceda476282c54d8cd36986203c Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sat, 5 Jan 2019 00:19:46 +0100 Subject: [PATCH 6/9] Different caches based on the module. Remove scrubber version since it is not relevant anymore --- lib/pleroma/html.ex | 30 ++++--------------- .../web/mastodon_api/views/status_view.ex | 6 +++- .../web/twitter_api/views/activity_view.ex | 8 +++-- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 71db516e6..41d41e535 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -28,13 +28,13 @@ defmodule Pleroma.HTML do def filter_tags(html), do: filter_tags(html, nil) def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags) - def get_cached_scrubbed_html_for_object(content, scrubbers, object) do - key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}" + def get_cached_scrubbed_html_for_object(content, scrubbers, object, module) do + key = "#{module}#{generate_scrubber_signature(scrubbers)}|#{object.id}" Cachex.fetch!(:scrubber_cache, key, fn _key -> ensure_scrubbed_html(content, scrubbers) end) end - def get_cached_stripped_html_for_object(content, object) do - get_cached_scrubbed_html_for_object(content, HtmlSanitizeEx.Scrubber.StripTags, object) + def get_cached_stripped_html_for_object(content, object, module) do + get_cached_scrubbed_html_for_object(content, HtmlSanitizeEx.Scrubber.StripTags, object, module) end def ensure_scrubbed_html( @@ -50,15 +50,7 @@ defmodule Pleroma.HTML do defp generate_scrubber_signature(scrubbers) do Enum.reduce(scrubbers, "", fn scrubber, signature -> - # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber.StripTags) it is assumed it is always 0) - version = - if Kernel.function_exported?(scrubber, :version, 0) do - scrubber.version - else - 0 - end - - "#{signature}#{to_string(scrubber)}#{version}" + "#{signature}#{to_string(scrubber)}" end) end end @@ -76,10 +68,6 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - def version do - 0 - end - Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() @@ -118,10 +106,6 @@ defmodule Pleroma.HTML.Scrubber.Default do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - def version do - 0 - end - @markup Application.get_env(:pleroma, :markup) @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) @@ -199,10 +183,6 @@ defmodule Pleroma.HTML.Transform.MediaProxy do alias Pleroma.Web.MediaProxy - def version do - 0 - end - def before_scrub(html), do: html def scrub_attribute("img", {"src", "http" <> target}) do diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 477ab3b5f..8e8fa8121 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -120,7 +120,11 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do content = object |> render_content() - |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) + |> HTML.get_cached_scrubbed_html_for_object( + User.html_filter_policy(opts[:for]), + activity, + __MODULE__ + ) %{ id: to_string(activity.id), diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex index 84f35ebf9..a0be5cfc5 100644 --- a/lib/pleroma/web/twitter_api/views/activity_view.ex +++ b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -245,14 +245,18 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do html = content - |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) + |> HTML.get_cached_scrubbed_html_for_object( + User.html_filter_policy(opts[:for]), + activity, + __MODULE__ + ) |> Formatter.emojify(object["emoji"]) text = if content do content |> String.replace(~r//, "\n") - |> HTML.get_cached_stripped_html_for_object(activity) + |> HTML.get_cached_stripped_html_for_object(activity, __MODULE__) end reply_parent = Activity.get_in_reply_to_activity(activity) From 1e2d58982ee6a74a249fc92fa2711f678a9f7464 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sat, 5 Jan 2019 00:25:31 +0100 Subject: [PATCH 7/9] oopsies --- lib/pleroma/html.ex | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 41d41e535..0c5b0f03f 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -34,7 +34,12 @@ defmodule Pleroma.HTML do end def get_cached_stripped_html_for_object(content, object, module) do - get_cached_scrubbed_html_for_object(content, HtmlSanitizeEx.Scrubber.StripTags, object, module) + get_cached_scrubbed_html_for_object( + content, + HtmlSanitizeEx.Scrubber.StripTags, + object, + module + ) end def ensure_scrubbed_html( From 0964c207eb184696355a2d8efd9d671dcc23ce66 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Fri, 4 Jan 2019 23:23:47 +0000 Subject: [PATCH 8/9] rich media: use cachex to avoid flooding remote servers --- lib/pleroma/application.ex | 11 +++++++++++ lib/pleroma/web/rich_media/parser.ex | 8 ++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index cb3e6b69b..ad2797209 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -63,6 +63,17 @@ defmodule Pleroma.Application do ], id: :cachex_object ), + worker( + Cachex, + [ + :rich_media_cache, + [ + default_ttl: :timer.minutes(120), + limit: 5000 + ] + ], + id: :cachex_rich_media + ), worker( Cachex, [ diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 477a38196..b88ed5371 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -2,9 +2,13 @@ defmodule Pleroma.Web.RichMedia.Parser do @parsers [Pleroma.Web.RichMedia.Parsers.OGP] def parse(url) do - {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) + Cachex.fetch!(:rich_media_cache, url, fn _ -> + {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) - html |> maybe_parse() |> get_parsed_data() + result = html |> maybe_parse() |> get_parsed_data() + + {:commit, result} + end) end defp maybe_parse(html) do From 487c00d36dee1b975ffc8fca8f0a5bb5510f71a3 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Fri, 4 Jan 2019 23:50:54 +0000 Subject: [PATCH 9/9] rich media: disable cachex in test mode --- lib/pleroma/web/rich_media/parser.ex | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index b88ed5371..3746feaf6 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -1,14 +1,17 @@ defmodule Pleroma.Web.RichMedia.Parser do @parsers [Pleroma.Web.RichMedia.Parsers.OGP] - def parse(url) do - Cachex.fetch!(:rich_media_cache, url, fn _ -> - {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) + if Mix.env() == :test do + def parse(url), do: parse_url(url) + else + def parse(url), + do: {:commit, Cachex.fetch!(:rich_media_cache, url, fn _ -> parse_url(url) end)} + end - result = html |> maybe_parse() |> get_parsed_data() + defp parse_url(url) do + {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url) - {:commit, result} - end) + html |> maybe_parse() |> get_parsed_data() end defp maybe_parse(html) do