diff --git a/lib/service/rich_media/parsers/meta_tags_parser.ex b/lib/service/rich_media/parsers/meta_tags_parser.ex index 2ac44a78..bd383db7 100644 --- a/lib/service/rich_media/parsers/meta_tags_parser.ex +++ b/lib/service/rich_media/parsers/meta_tags_parser.ex @@ -7,12 +7,21 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do @moduledoc """ Module to parse meta tags data in HTML pages """ - def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do + + def parse( + html, + data, + prefix, + error_message, + key_name, + value_name \\ :content, + allowed_attributes \\ [] + ) do meta_data = html |> get_elements(key_name, prefix) |> Enum.reduce(data, fn el, acc -> - attributes = normalize_attributes(el, prefix, key_name, value_name) + attributes = normalize_attributes(el, prefix, key_name, value_name, allowed_attributes) Map.merge(acc, attributes) end) @@ -27,18 +36,23 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do end defp get_elements(html, key_name, prefix) do - html |> Floki.find("meta[#{key_name}^='#{prefix}:']") + html |> Floki.find("meta[#{to_string(key_name)}^='#{prefix}:']") end - defp normalize_attributes(html_node, prefix, key_name, value_name) do + defp normalize_attributes(html_node, prefix, key_name, value_name, allowed_attributes) do {_tag, attributes, _children} = html_node data = - Enum.into(attributes, %{}, fn {name, value} -> + attributes + |> Enum.into(%{}, fn {name, value} -> {name, String.trim_leading(value, "#{prefix}:")} end) - %{String.to_atom(data[key_name]) => data[value_name]} + if data[to_string(key_name)] in Enum.map(allowed_attributes, &to_string/1) do + %{String.to_existing_atom(data[to_string(key_name)]) => data[to_string(value_name)]} + else + %{} + end end defp maybe_put_title(%{title: _} = meta, _), do: meta diff --git a/lib/service/rich_media/parsers/oembed_parser.ex b/lib/service/rich_media/parsers/oembed_parser.ex index dcc2d3e0..ce450d0d 100644 --- a/lib/service/rich_media/parsers/oembed_parser.ex +++ b/lib/service/rich_media/parsers/oembed_parser.ex @@ -41,10 +41,31 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OEmbed do {:ok, Enum.into(attributes, %{})["href"]} end + @oembed_allowed_attributes [ + :type, + :version, + :html, + :width, + :height, + :title, + :author_name, + :author_url, + :provider_name, + :provider_url, + :cache_age, + :thumbnail_url, + :thumbnail_width, + :thumbnail_height, + :url + ] + defp get_oembed_data(url) do with {:ok, %{body: json}} <- Tesla.get(url, opts: @http_options), {:ok, data} <- Jason.decode(json), - data <- data |> Map.new(fn {k, v} -> {String.to_atom(k), v} end) do + data <- + data + |> Map.new(fn {k, v} -> {String.to_existing_atom(k), v} end) + |> Map.take(@oembed_allowed_attributes) do {:ok, data} end end diff --git a/lib/service/rich_media/parsers/ogp.ex b/lib/service/rich_media/parsers/ogp.ex index 7bfcf88e..531f79bf 100644 --- a/lib/service/rich_media/parsers/ogp.ex +++ b/lib/service/rich_media/parsers/ogp.ex @@ -10,6 +10,26 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OGP do require Logger alias Mobilizon.Service.RichMedia.Parsers.MetaTagsParser + @opengraph_properties [ + :title, + :type, + :image, + :url, + :audio, + :description, + :determiner, + :locale, + :"locale:alternate", + :site_name, + :video, + :"image:url", + :"image.secure_url", + :"image:type", + :"image:width", + :"image:height", + :"image:alt" + ] + def parse(html, data) do Logger.debug("Using OpenGraph card parser") @@ -19,7 +39,9 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OGP do data, "og", "No OGP metadata found", - "property" + :property, + :content, + @opengraph_properties ) do data = transform_tags(data) Logger.debug("Data found with OpenGraph card parser") @@ -29,9 +51,11 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OGP do defp transform_tags(data) do data - |> Map.put(:image_remote_url, Map.get(data, :image)) - |> Map.put(:width, get_integer_value(data, :"image:width")) - |> Map.put(:height, get_integer_value(data, :"image:height")) + |> Enum.reject(fn {_, v} -> is_nil(v) end) + |> Map.new() + |> Map.update(:image_remote_url, Map.get(data, :image), & &1) + |> Map.update(:width, get_integer_value(data, :"image:width"), & &1) + |> Map.update(:height, get_integer_value(data, :"image:height"), & &1) end @spec get_integer_value(map(), atom()) :: integer() | nil diff --git a/lib/service/rich_media/parsers/twitter_card.ex b/lib/service/rich_media/parsers/twitter_card.ex index 2399d72a..6d2d5297 100644 --- a/lib/service/rich_media/parsers/twitter_card.ex +++ b/lib/service/rich_media/parsers/twitter_card.ex @@ -10,25 +10,49 @@ defmodule Mobilizon.Service.RichMedia.Parsers.TwitterCard do alias Mobilizon.Service.RichMedia.Parsers.MetaTagsParser require Logger + @twitter_card_properties [ + :card, + :site, + :creator, + :title, + :description, + :image, + :"image:alt" + ] + @spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()} def parse(html, data) do Logger.debug("Using Twitter card parser") - res = + with {:ok, data} <- parse_name_attrs(data, html), + {:ok, data} <- parse_property_attrs(data, html), + data <- transform_tags(data) do + Logger.debug("Data found with Twitter card parser") + Logger.debug(inspect(data)) data - |> parse_name_attrs(html) - |> parse_property_attrs(html) - - Logger.debug("Data found with Twitter card parser") - Logger.debug(inspect(res)) - res + end end defp parse_name_attrs(data, html) do - MetaTagsParser.parse(html, data, "twitter", %{}, "name") + MetaTagsParser.parse(html, data, "twitter", %{}, :name, :content, [:"twitter:card"]) end defp parse_property_attrs({_, data}, html) do - MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property") + MetaTagsParser.parse( + html, + data, + "twitter", + "No twitter card metadata found", + :property, + :content, + @twitter_card_properties + ) + end + + defp transform_tags(data) do + data + |> Enum.reject(fn {_, v} -> is_nil(v) end) + |> Map.new() + |> Map.update(:image_remote_url, Map.get(data, :image), & &1) end end