akkoma/lib/pleroma/web/mastodon_api/views/status_view.ex

869 lines
27 KiB
Elixir
Raw Normal View History

# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
2017-09-09 04:10:29 -06:00
defmodule Pleroma.Web.MastodonAPI.StatusView do
use Pleroma.Web, :view
2018-12-06 11:50:34 -07:00
require Pleroma.Constants
2018-12-06 21:53:14 -07:00
alias Pleroma.Activity
alias Pleroma.HTML
2021-05-12 15:16:10 -06:00
alias Pleroma.Maps
alias Pleroma.Object
2019-04-17 05:52:01 -06:00
alias Pleroma.Repo
2018-12-06 21:53:14 -07:00
alias Pleroma.User
alias Pleroma.UserRelationship
alias Pleroma.Web.CommonAPI
2017-09-15 09:50:47 -06:00
alias Pleroma.Web.CommonAPI.Utils
2018-12-06 21:53:14 -07:00
alias Pleroma.Web.MastodonAPI.AccountView
alias Pleroma.Web.MastodonAPI.PollView
2018-12-06 21:53:14 -07:00
alias Pleroma.Web.MastodonAPI.StatusView
2017-11-22 11:06:07 -07:00
alias Pleroma.Web.MediaProxy
alias Pleroma.Web.PleromaAPI.EmojiReactionController
require Logger
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
alias Pleroma.Web.RichMedia.Card
2018-03-27 10:18:24 -06:00
2020-06-24 05:29:08 -06:00
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
# This is a naive way to do this, just spawning a process per activity
# to fetch the preview. However it should be fine considering
# pagination is restricted to 40 activities at a time
defp fetch_rich_media_for_activities(activities) do
2022-12-30 20:53:52 -07:00
Enum.each(activities, fn activity ->
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
spawn(fn -> Card.get_by_activity(activity) end)
end)
end
2018-03-27 10:18:24 -06:00
# TODO: Add cached version.
defp get_replied_to_activities([]), do: %{}
2018-03-27 10:18:24 -06:00
defp get_replied_to_activities(activities) do
activities
|> Enum.map(fn
%{data: %{"type" => "Create"}} = activity ->
object = Object.normalize(activity, fetch: false)
object && object.data["inReplyTo"] != "" && object.data["inReplyTo"]
2018-03-30 07:01:53 -06:00
_ ->
nil
2018-03-27 10:18:24 -06:00
end)
2018-03-30 07:01:53 -06:00
|> Enum.filter(& &1)
|> Activity.create_by_object_ap_id_with_object()
2018-03-30 07:01:53 -06:00
|> Repo.all()
|> Enum.reduce(%{}, fn activity, acc ->
object = Object.normalize(activity, fetch: false)
if object, do: Map.put(acc, object.data["id"], activity), else: acc
2018-03-30 07:01:53 -06:00
end)
2018-03-27 10:18:24 -06:00
end
2017-09-09 04:10:29 -06:00
# DEPRECATED This field seems to be a left-over from the StatusNet era.
# If your application uses `pleroma.conversation_id`: this field is deprecated.
# It is currently stubbed instead by doing a CRC32 of the context, and
# clearing the MSB to avoid overflow exceptions with signed integers on the
# different clients using this field (Java/Kotlin code, mostly; see Husky.)
# This should be removed in a future version of Pleroma. Pleroma-FE currently
# depends on this field, as well.
defp get_context_id(%{data: %{"context" => context}}) when is_binary(context) do
import Bitwise
:erlang.crc32(context)
|> band(bnot(0x8000_0000))
end
defp get_context_id(_), do: nil
# Check if the user reblogged this status
defp reblogged?(activity, %User{ap_id: ap_id}) do
with %Object{data: %{"announcements" => announcements}} when is_list(announcements) <-
Object.normalize(activity, fetch: false) do
ap_id in announcements
else
_ -> false
end
end
# False if the user is logged out
defp reblogged?(_activity, _user), do: false
2017-09-09 04:10:29 -06:00
def render("index.json", opts) do
Logger.debug("Rendering index")
reading_user = opts[:for]
# To do: check AdminAPIControllerTest on the reasons behind nil activities in the list
activities = Enum.filter(opts.activities, & &1)
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
# Start prefetching rich media before doing anything else
fetch_rich_media_for_activities(activities)
replied_to_activities = get_replied_to_activities(activities)
parent_activities =
activities
|> Enum.filter(&(&1.data["type"] == "Announce" && &1.data["object"]))
|> Enum.map(&Object.normalize(&1, fetch: false).data["id"])
|> Activity.create_by_object_ap_id()
|> Activity.with_preloaded_object(:left)
|> Activity.with_preloaded_bookmark(reading_user)
|> Activity.with_set_thread_muted_field(reading_user)
|> Repo.all()
relationships_opt =
cond do
Map.has_key?(opts, :relationships) ->
opts[:relationships]
is_nil(reading_user) ->
UserRelationship.view_relationships_option(nil, [])
true ->
# Note: unresolved users are filtered out
actors =
(activities ++ parent_activities)
|> Enum.map(&CommonAPI.get_user(&1.data["actor"], false))
|> Enum.filter(& &1)
UserRelationship.view_relationships_option(reading_user, actors, subset: :source_mutes)
end
opts =
opts
|> Map.put(:replied_to_activities, replied_to_activities)
|> Map.put(:parent_activities, parent_activities)
|> Map.put(:relationships, relationships_opt)
2018-03-30 07:01:53 -06:00
render_many(activities, StatusView, "show.json", opts)
2017-09-09 04:10:29 -06:00
end
2018-03-30 07:01:53 -06:00
def render(
"show.json",
%{activity: %{id: id, data: %{"type" => "Announce", "object" => _object}} = activity} =
opts
2018-03-30 07:01:53 -06:00
) do
Logger.debug("Rendering reblog #{id}")
user = CommonAPI.get_user(activity.data["actor"])
2017-09-17 05:54:14 -06:00
created_at = Utils.to_masto_date(activity.data["published"])
object = Object.normalize(activity, fetch: false)
2017-09-17 05:54:14 -06:00
reblogged_parent_activity =
if opts[:parent_activities] do
Activity.Queries.find_by_object_ap_id(
opts[:parent_activities],
object.data["id"]
)
else
Activity.create_by_object_ap_id(object.data["id"])
|> Activity.with_preloaded_bookmark(opts[:for])
|> Activity.with_set_thread_muted_field(opts[:for])
|> Repo.one()
end
reblog_rendering_opts = Map.put(opts, :activity, reblogged_parent_activity)
reblogged = render("show.json", reblog_rendering_opts)
2017-09-17 05:54:14 -06:00
favorited = opts[:for] && opts[:for].ap_id in (object.data["likes"] || [])
2019-04-22 03:16:19 -06:00
bookmarked = Activity.get_bookmark(reblogged_parent_activity, opts[:for]) != nil
2018-03-30 07:01:53 -06:00
mentions =
activity.recipients
|> Enum.map(fn ap_id -> User.get_cached_by_ap_id(ap_id) end)
|> Enum.filter(& &1)
|> Enum.map(fn user -> AccountView.render("mention.json", %{user: user}) end)
2017-09-17 05:54:14 -06:00
{pinned?, pinned_at} = pin_data(object, user)
lang = language(object)
2017-09-17 05:54:14 -06:00
%{
id: to_string(activity.id),
uri: object.data["id"],
url: object.data["id"],
account:
AccountView.render("show.json", %{
user: user,
for: opts[:for]
}),
2017-09-17 05:54:14 -06:00
in_reply_to_id: nil,
in_reply_to_account_id: nil,
reblog: reblogged,
2023-03-02 04:04:27 -07:00
content: "",
2017-09-17 05:54:14 -06:00
created_at: created_at,
reblogs_count: 0,
replies_count: 0,
2017-09-17 05:54:14 -06:00
favourites_count: 0,
reblogged: reblogged?(reblogged_parent_activity, opts[:for]),
favourited: present?(favorited),
bookmarked: present?(bookmarked),
2017-09-17 05:54:14 -06:00
muted: false,
pinned: pinned?,
2017-09-17 05:54:14 -06:00
sensitive: false,
spoiler_text: "",
visibility: get_visibility(activity),
2018-12-06 11:50:34 -07:00
media_attachments: reblogged[:media_attachments] || [],
2017-09-17 05:54:14 -06:00
mentions: mentions,
2018-12-06 11:50:34 -07:00
tags: reblogged[:tags] || [],
application: build_application(object.data["generator"]),
language: lang,
emojis: [],
pleroma: %{
local: activity.local,
pinned_at: pinned_at
}
2017-09-17 05:54:14 -06:00
}
end
def render("show.json", %{activity: %{id: id, data: %{"object" => _object}} = activity} = opts) do
Logger.debug("Rendering status #{id}")
with %Object{} = object <- Object.normalize(activity, fetch: false) do
user = CommonAPI.get_user(activity.data["actor"])
user_follower_address = user.follower_address
like_count = object.data["like_count"] || 0
announcement_count = object.data["announcement_count"] || 0
hashtags = Object.hashtags(object)
sensitive = object.data["sensitive"] || Enum.member?(hashtags, "nsfw")
tags = Object.tags(object)
tag_mentions =
tags
|> Enum.filter(fn tag -> is_map(tag) and tag["type"] == "Mention" end)
|> Enum.map(fn tag -> tag["href"] end)
2023-03-11 10:24:49 -07:00
to_data = if is_nil(object.data["to"]), do: [], else: object.data["to"]
mentions =
2023-03-11 10:24:49 -07:00
(to_data ++ tag_mentions)
|> Enum.uniq()
|> Enum.map(fn
Pleroma.Constants.as_public() -> nil
^user_follower_address -> nil
ap_id -> User.get_cached_by_ap_id(ap_id)
end)
|> Enum.filter(& &1)
|> Enum.map(fn user -> AccountView.render("mention.json", %{user: user}) end)
favorited = opts[:for] && opts[:for].ap_id in (object.data["likes"] || [])
bookmarked = Activity.get_bookmark(activity, opts[:for]) != nil
client_posted_this_activity = opts[:for] && user.id == opts[:for].id
expires_at =
with true <- client_posted_this_activity,
%Oban.Job{scheduled_at: scheduled_at} <-
Pleroma.Workers.PurgeExpiredActivity.get_expiration(activity.id) do
scheduled_at
else
_ -> nil
end
thread_muted? =
cond do
is_nil(opts[:for]) -> false
is_boolean(activity.thread_muted?) -> activity.thread_muted?
true -> CommonAPI.thread_muted?(opts[:for], activity)
end
attachment_data = object.data["attachment"] || []
attachments = render_many(attachment_data, StatusView, "attachment.json", as: :attachment)
created_at = Utils.to_masto_date(object.data["published"])
edited_at =
with %{"updated" => updated} <- object.data,
date <- Utils.to_masto_date(updated),
true <- date != "" do
date
else
_ ->
nil
end
reply_to = get_reply_to(activity, opts)
reply_to_user = reply_to && CommonAPI.get_user(reply_to.data["actor"])
history_len =
1 +
(Object.Updater.history_for(object.data)
|> Map.get("orderedItems")
|> length())
# See render("history.json", ...) for more details
# Here the implicit index of the current content is 0
chrono_order = history_len - 1
content =
object
|> render_content()
2024-06-09 10:34:41 -06:00
quote_post =
if visible_for_user?(quote_activity, opts[:for]) and opts[:show_quote] != false do
quote_rendering_opts = Map.merge(opts, %{activity: quote_activity, show_quote: false})
render("show.json", quote_rendering_opts)
else
nil
end
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
2024-06-09 10:34:41 -06:00
content =
object
|> render_content()
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
2024-06-09 10:34:41 -06:00
content_html =
content
|> Activity.HTML.get_cached_scrubbed_html_for_activity(
User.html_filter_policy(opts[:for]),
activity,
"mastoapi:content:#{chrono_order}"
)
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
2024-06-09 10:34:41 -06:00
content_plaintext =
content
|> Activity.HTML.get_cached_stripped_html_for_activity(
activity,
"mastoapi:content:#{chrono_order}"
)
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
2024-06-09 10:34:41 -06:00
summary = object.data["summary"] || ""
card =
case Card.get_by_activity(activity) do
%Card{} = result -> render("card.json", result)
_ -> nil
end
url =
if user.local do
Pleroma.Web.Router.Helpers.o_status_url(Pleroma.Web.Endpoint, :notice, activity)
else
object.data["url"] || object.data["external_url"] || object.data["id"]
end
direct_conversation_id =
with {_, nil} <- {:direct_conversation_id, opts[:direct_conversation_id]},
{_, true} <- {:include_id, opts[:with_direct_conversation_id]},
{_, %User{} = for_user} <- {:for_user, opts[:for]} do
Activity.direct_conversation_id(activity, for_user)
else
{:direct_conversation_id, participation_id} when is_integer(participation_id) ->
participation_id
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
2024-06-09 10:34:41 -06:00
_e ->
nil
end
emoji_reactions =
object
|> Object.get_emoji_reactions()
|> EmojiReactionController.filter_allowed_users(
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
opts[:for],
2024-06-09 10:34:41 -06:00
Map.get(opts, :with_muted, false)
)
2024-06-09 10:34:41 -06:00
|> Stream.map(fn {emoji, users, url} ->
build_emoji_map(emoji, users, url, opts[:for])
end)
|> Enum.to_list()
# Status muted state (would do 1 request per status unless user mutes are preloaded)
muted =
thread_muted? ||
UserRelationship.exists?(
get_in(opts, [:relationships, :user_relationships]),
:mute,
opts[:for],
user,
fn for_user, user -> User.mutes?(for_user, user) end
)
content_plaintext =
content
|> Activity.HTML.get_cached_stripped_html_for_activity(
activity,
"mastoapi:content:#{chrono_order}"
)
2022-06-10 06:41:08 -06:00
summary = object.data["summary"] || ""
card = render("card.json", Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity))
url =
if user.local do
url(~p[/notice/#{activity}])
else
object.data["url"] || object.data["external_url"] || object.data["id"]
end
direct_conversation_id =
with {_, nil} <- {:direct_conversation_id, opts[:direct_conversation_id]},
{_, true} <- {:include_id, opts[:with_direct_conversation_id]},
{_, %User{} = for_user} <- {:for_user, opts[:for]} do
Activity.direct_conversation_id(activity, for_user)
else
{:direct_conversation_id, participation_id} when is_integer(participation_id) ->
participation_id
_e ->
nil
end
emoji_reactions =
object.data
|> Map.get("reactions", [])
|> EmojiReactionController.filter_allowed_users(
opts[:for],
Map.get(opts, :with_muted, false)
)
|> Stream.map(fn {emoji, users, url} ->
build_emoji_map(emoji, users, url, opts[:for])
end)
|> Enum.to_list()
# Status muted state (would do 1 request per status unless user mutes are preloaded)
muted =
thread_muted? ||
UserRelationship.exists?(
get_in(opts, [:relationships, :user_relationships]),
:mute,
opts[:for],
user,
fn for_user, user -> User.mutes?(for_user, user) end
)
{pinned?, pinned_at} = pin_data(object, user)
quote = Activity.get_quoted_activity_from_object(object)
lang = language(object)
%{
id: to_string(activity.id),
uri: object.data["id"],
url: url,
account:
AccountView.render("show.json", %{
user: user,
for: opts[:for]
}),
in_reply_to_id: reply_to && to_string(reply_to.id),
in_reply_to_account_id: reply_to_user && to_string(reply_to_user.id),
reblog: nil,
card: card,
content: content_html,
text: opts[:with_source] && get_source_text(object.data["source"]),
created_at: created_at,
edited_at: edited_at,
reblogs_count: announcement_count,
replies_count: object.data["repliesCount"] || 0,
favourites_count: like_count,
reblogged: reblogged?(activity, opts[:for]),
favourited: present?(favorited),
bookmarked: present?(bookmarked),
muted: muted,
pinned: pinned?,
sensitive: sensitive,
spoiler_text: summary,
visibility: get_visibility(object),
media_attachments: attachments,
poll: render(PollView, "show.json", object: object, for: opts[:for]),
mentions: mentions,
tags: build_tags(tags),
application: build_application(object.data["generator"]),
language: lang,
emojis: build_emojis(object.data["emoji"]),
quote_id: if(quote, do: quote.id, else: nil),
quote: maybe_render_quote(quote, opts),
2020-06-24 05:29:08 -06:00
emoji_reactions: emoji_reactions,
pleroma: %{
local: activity.local,
conversation_id: get_context_id(activity),
context: object.data["context"],
in_reply_to_account_acct: reply_to_user && reply_to_user.nickname,
content: %{"text/plain" => content_plaintext},
spoiler_text: %{"text/plain" => summary},
expires_at: expires_at,
direct_conversation_id: direct_conversation_id,
thread_muted: thread_muted?,
emoji_reactions: emoji_reactions,
parent_visible: visible_for_user?(reply_to, opts[:for]),
pinned_at: pinned_at
},
akkoma: %{
source: object.data["source"]
}
}
else
nil -> nil
end
2017-09-09 04:10:29 -06:00
end
2017-09-10 03:51:01 -06:00
def render("show.json", _) do
nil
end
def render("history.json", %{activity: %{data: %{"object" => _object}} = activity} = opts) do
Logger.debug("Rendering history for #{activity.id}")
object = Object.normalize(activity, fetch: false)
hashtags = Object.hashtags(object)
user = CommonAPI.get_user(activity.data["actor"])
past_history =
Object.Updater.history_for(object.data)
|> Map.get("orderedItems")
|> Enum.map(&Map.put(&1, "id", object.data["id"]))
|> Enum.map(&%Object{data: &1, id: object.id})
history =
[object | past_history]
# Mastodon expects the original to be at the first
|> Enum.reverse()
|> Enum.with_index()
|> Enum.map(fn {object, chrono_order} ->
%{
# The history is prepended every time there is a new edit.
# In chrono_order, the oldest item is always at 0, and so on.
# The chrono_order is an invariant kept between edits.
chrono_order: chrono_order,
object: object
}
end)
individual_opts =
opts
|> Map.put(:as, :item)
|> Map.put(:user, user)
|> Map.put(:hashtags, hashtags)
render_many(history, StatusView, "history_item.json", individual_opts)
end
def render(
"history_item.json",
%{
activity: activity,
user: user,
item: %{object: object, chrono_order: chrono_order},
hashtags: hashtags
} = opts
) do
sensitive = object.data["sensitive"] || Enum.member?(hashtags, "nsfw")
attachment_data = object.data["attachment"] || []
attachments = render_many(attachment_data, StatusView, "attachment.json", as: :attachment)
created_at = Utils.to_masto_date(object.data["updated"] || object.data["published"])
content =
object
|> render_content()
content_html =
content
|> Activity.HTML.get_cached_scrubbed_html_for_activity(
User.html_filter_policy(opts[:for]),
activity,
"mastoapi:content:#{chrono_order}"
)
summary = object.data["summary"] || ""
%{
account:
AccountView.render("show.json", %{
user: user,
for: opts[:for]
}),
content: content_html,
sensitive: sensitive,
spoiler_text: summary,
created_at: created_at,
media_attachments: attachments,
emojis: build_emojis(object.data["emoji"]),
poll: render(PollView, "show.json", object: object, for: opts[:for])
}
end
def render("source.json", %{activity: %{data: %{"object" => _object}} = activity} = _opts) do
object = Object.normalize(activity, fetch: false)
%{
id: activity.id,
text: get_source_text(Map.get(object.data, "source", "")),
spoiler_text: Map.get(object.data, "summary", ""),
content_type: get_source_content_type(object.data["source"])
}
end
RichMedia refactor Rich Media parsing was previously handled on-demand with a 2 second HTTP request timeout and retained only in Cachex. Every time a Pleroma instance is restarted it will have to request and parse the data for each status with a URL detected. When fetching a batch of statuses they were processed in parallel to attempt to keep the maximum latency at 2 seconds, but often resulted in a timeline appearing to hang during loading due to a URL that could not be successfully reached. URLs which had images links that expire (Amazon AWS) were parsed and inserted with a TTL to ensure the image link would not break. Rich Media data is now cached in the database and fetched asynchronously. Cachex is used as a read-through cache. When the data becomes available we stream an update to the clients. If the result is returned quickly the experience is almost seamless. Activities were already processed for their Rich Media data during ingestion to warm the cache, so users should not normally encounter the asynchronous loading of the Rich Media data. Implementation notes: - The async worker is a Task with a globally unique process name to prevent duplicate processing of the same URL - The Task will attempt to fetch the data 3 times with increasing sleep time between attempts - The HTTP request obeys the default HTTP request timeout value instead of 2 seconds - URLs that cannot be successfully parsed due to an unexpected error receives a negative cache entry for 15 minutes - URLs that fail with an expected error will receive a negative cache with no TTL - Activities that have no detected URLs insert a nil value in the Cachex :scrubber_cache so we do not repeat parsing the object content with Floki every time the activity is rendered - Expiring image URLs are handled with an Oban job - There is no automatic cleanup of the Rich Media data in the database, but it is safe to delete at any time - The post draft/preview feature makes the URL processing synchronous so the rendered post preview will have an accurate rendering Overall performance of timelines and creating new posts which contain URLs is greatly improved.
2024-02-11 14:11:52 -07:00
def render("card.json", %Card{fields: rich_media}) do
page_url_data = URI.parse(rich_media["url"])
2019-02-02 01:38:37 -07:00
2019-02-02 02:24:24 -07:00
page_url = page_url_data |> to_string
image_url_data =
2020-06-09 11:49:24 -06:00
if is_binary(rich_media["image"]) do
URI.parse(rich_media["image"])
else
nil
end
2019-02-02 02:24:24 -07:00
image_url = build_image_url(image_url_data, page_url_data)
%{
type: "link",
provider_name: page_url_data.host,
provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
url: page_url,
2019-02-02 01:38:37 -07:00
image: image_url |> MediaProxy.url(),
2020-06-09 11:49:24 -06:00
title: rich_media["title"] || "",
description: rich_media["description"] || "",
pleroma: %{
opengraph: rich_media
}
}
end
2019-09-23 13:37:30 -06:00
def render("card.json", _), do: nil
2017-09-10 03:51:01 -06:00
def render("attachment.json", %{attachment: attachment}) do
2018-06-13 16:26:37 -06:00
[attachment_url | _] = attachment["url"]
media_type = attachment_url["mediaType"] || attachment_url["mimeType"] || "image"
href = attachment_url["href"] |> MediaProxy.url()
href_preview = attachment_url["href"] |> MediaProxy.preview_url()
2021-05-12 15:16:10 -06:00
meta = render("attachment_meta.json", %{attachment: attachment})
2017-09-10 03:51:01 -06:00
2018-03-30 07:01:53 -06:00
type =
cond do
String.contains?(media_type, "image") -> "image"
String.contains?(media_type, "video") -> "video"
String.contains?(media_type, "audio") -> "audio"
true -> "unknown"
end
2017-09-10 03:51:01 -06:00
attachment_id =
with {_, ap_id} when is_binary(ap_id) <- {:ap_id, attachment["id"]},
{_, %Object{data: _object_data, id: object_id}} <-
{:object, Object.get_by_ap_id(ap_id)} do
to_string(object_id)
else
_ ->
<<hash_id::signed-32, _rest::binary>> = :crypto.hash(:md5, href)
to_string(attachment["id"] || hash_id)
end
2017-09-10 03:51:01 -06:00
%{
id: attachment_id,
url: href,
2017-09-10 03:51:01 -06:00
remote_url: href,
preview_url: href_preview,
text_url: href,
type: type,
description: attachment["name"],
2020-11-11 11:51:13 -07:00
pleroma: %{mime_type: media_type},
blurhash: attachment["blurhash"]
2017-09-10 03:51:01 -06:00
}
2021-05-12 15:16:10 -06:00
|> Maps.put_if_present(:meta, meta)
2017-09-10 03:51:01 -06:00
end
def render("attachment_meta.json", %{
attachment: %{"url" => [%{"width" => width, "height" => height} | _]}
})
when is_integer(width) and is_integer(height) do
%{
original: %{
width: width,
height: height,
aspect: width / height
}
}
2017-09-10 03:51:01 -06:00
end
2021-05-12 15:16:10 -06:00
def render("attachment_meta.json", _), do: nil
def render("context.json", %{activity: activity, activities: activities, user: user}) do
Logger.debug("Rendering context for #{activity.id}")
%{ancestors: ancestors, descendants: descendants} =
activities
|> Enum.reverse()
|> Enum.group_by(fn %{id: id} -> if id < activity.id, do: :ancestors, else: :descendants end)
|> Map.put_new(:ancestors, [])
|> Map.put_new(:descendants, [])
%{
ancestors: render("index.json", for: user, activities: ancestors, as: :activity),
descendants: render("index.json", for: user, activities: descendants, as: :activity)
}
end
def get_reply_to(activity, %{replied_to_activities: replied_to_activities}) do
object = Object.normalize(activity, fetch: false)
with nil <- replied_to_activities[object.data["inReplyTo"]] do
2019-04-11 20:21:32 -06:00
# If user didn't participate in the thread
Activity.get_in_reply_to_activity(activity)
end
end
def get_reply_to(%{data: %{"object" => _object}} = activity, _) do
object = Object.normalize(activity, fetch: false)
if object.data["inReplyTo"] && object.data["inReplyTo"] != "" do
Activity.get_create_by_object_ap_id(object.data["inReplyTo"])
else
nil
end
end
2020-08-20 10:41:42 -06:00
def render_content(%{data: %{"name" => name}} = object) when not is_nil(name) and name != "" do
url = object.data["url"] || object.data["id"]
2020-08-20 10:41:42 -06:00
"<p><a href=\"#{url}\">#{name}</a></p>#{object.data["content"]}"
end
def render_content(object), do: object.data["content"] || ""
2018-06-24 00:34:44 -06:00
2018-12-13 05:13:02 -07:00
@doc """
Builds a dictionary tags.
## Examples
iex> Pleroma.Web.MastodonAPI.StatusView.build_tags(["fediverse", "nextcloud"])
[{"name": "fediverse", "url": "/tag/fediverse"},
{"name": "nextcloud", "url": "/tag/nextcloud"}]
"""
2018-12-14 12:56:37 -07:00
@spec build_tags(list(any())) :: list(map())
2018-12-13 05:13:02 -07:00
def build_tags(object_tags) when is_list(object_tags) do
object_tags
|> Enum.filter(&is_binary/1)
|> Enum.map(&%{name: &1, url: "#{Pleroma.Web.Endpoint.url()}/tag/#{URI.encode(&1)}"})
end
2018-12-13 05:13:02 -07:00
def build_tags(_), do: []
2018-06-24 00:34:44 -06:00
2018-12-06 11:50:34 -07:00
@doc """
Builds list emojis.
2018-12-06 11:50:34 -07:00
Arguments: `nil` or list tuple of name and url.
Returns list emojis.
## Examples
2018-12-06 11:50:34 -07:00
iex> Pleroma.Web.MastodonAPI.StatusView.build_emojis([{"2hu", "corndog.png"}])
[%{shortcode: "2hu", static_url: "corndog.png", url: "corndog.png", visible_in_picker: false}]
"""
@spec build_emojis(nil | list(tuple())) :: list(map())
def build_emojis(nil), do: []
def build_emojis(emojis) do
emojis
|> Enum.map(fn {name, url} ->
name = HTML.strip_tags(name)
url =
url
|> HTML.strip_tags()
|> MediaProxy.url()
%{shortcode: name, url: url, static_url: url, visible_in_picker: false}
end)
end
2018-12-06 11:50:34 -07:00
defp present?(nil), do: false
defp present?(false), do: false
defp present?(_), do: true
2019-01-08 01:27:02 -07:00
defp pin_data(%Object{data: %{"id" => object_id}}, %User{pinned_objects: pinned_objects}) do
if pinned_at = pinned_objects[object_id] do
{true, Utils.to_masto_date(pinned_at)}
else
{false, nil}
end
end
2022-06-07 19:42:44 -06:00
defp build_emoji_map(emoji, users, url, current_user) do
%{
name: Pleroma.Web.PleromaAPI.EmojiReactionView.emoji_name(emoji, url),
count: length(users),
2022-06-08 10:19:42 -06:00
url: MediaProxy.url(url),
me: !!(current_user && current_user.ap_id in users),
account_ids: Enum.map(users, fn user -> User.get_cached_by_ap_id(user).id end)
}
end
@spec build_application(map() | nil) :: map() | nil
2021-03-02 10:37:37 -07:00
defp build_application(%{"type" => _type, "name" => name, "url" => url}),
do: %{name: name, website: url}
defp build_application(_), do: nil
# Workaround for Elixir issue #10771
# Avoid applying URI.merge unless necessary
# TODO: revert to always attempting URI.merge(image_url_data, page_url_data)
# when Elixir 1.12 is the minimum supported version
@spec build_image_url(struct() | nil, struct()) :: String.t() | nil
defp build_image_url(
%URI{scheme: image_scheme, host: image_host} = image_url_data,
%URI{} = _page_url_data
)
when not is_nil(image_scheme) and not is_nil(image_host) do
image_url_data |> to_string
end
defp build_image_url(%URI{} = image_url_data, %URI{} = page_url_data) do
URI.merge(page_url_data, image_url_data) |> to_string
end
defp build_image_url(_, _), do: nil
defp maybe_render_quote(nil, _), do: nil
defp maybe_render_quote(quote, opts) do
with %User{} = quoted_user <- User.get_cached_by_ap_id(quote.actor),
false <- Map.get(opts, :do_not_recurse, false),
true <- visible_for_user?(quote, opts[:for]),
false <- User.blocks?(opts[:for], quoted_user),
false <- User.mutes?(opts[:for], quoted_user) do
opts =
opts
|> Map.put(:activity, quote)
|> Map.put(:do_not_recurse, true)
render("show.json", opts)
else
_ -> nil
end
end
defp get_source_text(%{"content" => content} = _source) do
content
end
defp get_source_text(source) when is_binary(source) do
source
end
defp get_source_text(_) do
""
end
defp get_source_content_type(%{"mediaType" => type} = _source) do
type
end
defp get_source_content_type(_source) do
Utils.get_content_type(nil)
end
defp language(%Object{data: %{"contentMap" => contentMap}}) when is_map(contentMap) do
contentMap
|> Map.keys()
|> Enum.at(0)
end
defp language(_), do: nil
2017-09-09 04:10:29 -06:00
end