-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from echaritonidis/feature/feed-content-image
Feature/feed content image
- Loading branch information
Showing
12 changed files
with
257 additions
and
129 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20 changes: 10 additions & 10 deletions
20
Server/Infrastructure/Services/Contracts/IExtractContent.cs
100755 → 100644
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,10 @@ | ||
using RssFeeder.Shared.Model; | ||
|
||
namespace RssFeeder.Server.Infrastructure.Services.Contracts | ||
{ | ||
public interface IExtractContent | ||
{ | ||
public List<FeedContent> GetContentItems(string xmlContent); | ||
} | ||
} | ||
using RssFeeder.Shared.Model; | ||
|
||
namespace RssFeeder.Server.Infrastructure.Services.Contracts | ||
{ | ||
public interface IExtractContent | ||
{ | ||
public Task<List<FeedContent>> GetContentItems(string xmlContent); | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
namespace RssFeeder.Server.Infrastructure.Services.Contracts | ||
{ | ||
public interface IExtractImage | ||
{ | ||
public Task<string> GetImageBase64ByHref(string href); | ||
} | ||
} | ||
|
123 changes: 63 additions & 60 deletions
123
Server/Infrastructure/Services/Implementations/ExtractContent.cs
100755 → 100644
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,63 @@ | ||
using System.Xml.Linq; | ||
using RssFeeder.Server.Infrastructure.Services.Contracts; | ||
using RssFeeder.Server.Infrastructure.Utils; | ||
using RssFeeder.Shared.Extensions; | ||
using RssFeeder.Shared.Model; | ||
|
||
namespace RssFeeder.Server.Infrastructure.Services.Implementations | ||
{ | ||
public class ExtractContent : IExtractContent | ||
{ | ||
private readonly DateRegexUtil _dateRegexUtil; | ||
|
||
public ExtractContent(DateRegexUtil dateRegexUtil) | ||
{ | ||
_dateRegexUtil = dateRegexUtil; | ||
} | ||
|
||
public List<FeedContent> GetContentItems(string xmlContent) | ||
{ | ||
List<FeedContent> result = new(); | ||
|
||
var xml = XDocument.Parse(xmlContent); | ||
|
||
var channel = xml.Descendants("channel"); | ||
var items = channel.Descendants("item"); | ||
|
||
foreach (var item in items) | ||
{ | ||
var link = item.GetElement("link"); | ||
|
||
if (string.IsNullOrEmpty(link)) | ||
{ | ||
link = item.GetElement("guid"); | ||
} | ||
|
||
var pubDate = item.GetElement("pubDate"); | ||
var match = _dateRegexUtil.IsMatch(pubDate); | ||
|
||
if (match.Success) | ||
{ | ||
pubDate = match.Value; | ||
} | ||
|
||
result.Add | ||
( | ||
new FeedContent | ||
{ | ||
Title = item.GetElement("title"), | ||
Link = link, | ||
Description = item.GetElement("description"), | ||
PubDate = pubDate | ||
} | ||
); | ||
} | ||
|
||
return result; | ||
} | ||
} | ||
} | ||
|
||
using System.Xml.Linq; | ||
using RssFeeder.Server.Infrastructure.Services.Contracts; | ||
using RssFeeder.Server.Infrastructure.Utils; | ||
using RssFeeder.Shared.Extensions; | ||
using RssFeeder.Shared.Model; | ||
|
||
namespace RssFeeder.Server.Infrastructure.Services.Implementations | ||
{ | ||
public class ExtractContent : IExtractContent | ||
{ | ||
private readonly IExtractImage _extractImage; | ||
private readonly DateRegexUtil _dateRegexUtil; | ||
|
||
public ExtractContent(IExtractImage extractImage, DateRegexUtil dateRegexUtil) | ||
{ | ||
_extractImage = extractImage; | ||
_dateRegexUtil = dateRegexUtil; | ||
} | ||
|
||
public async Task<List<FeedContent>> GetContentItems(string xmlContent) | ||
{ | ||
List<FeedContent> result = new(); | ||
|
||
var xml = XDocument.Parse(xmlContent); | ||
|
||
var channel = xml.Descendants("channel"); | ||
var items = channel.Descendants("item"); | ||
|
||
foreach (var item in items) | ||
{ | ||
var link = item.GetElement("link"); | ||
|
||
if (string.IsNullOrEmpty(link)) | ||
{ | ||
link = item.GetElement("guid"); | ||
} | ||
|
||
var pubDate = item.GetElement("pubDate"); | ||
var match = _dateRegexUtil.IsMatch(pubDate); | ||
|
||
if (match.Success) | ||
{ | ||
pubDate = match.Value; | ||
} | ||
|
||
result.Add | ||
( | ||
new FeedContent | ||
{ | ||
Title = item.GetElement("title"), | ||
Link = link, | ||
ImageBase64 = await _extractImage.GetImageBase64ByHref(link), | ||
Description = item.GetElement("description"), | ||
PubDate = pubDate | ||
} | ||
); | ||
} | ||
|
||
return result; | ||
} | ||
} | ||
} | ||
|
104 changes: 104 additions & 0 deletions
104
Server/Infrastructure/Services/Implementations/ExtractImage.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
using AngleSharp.Html.Dom; | ||
using AngleSharp.Html.Parser; | ||
using RssFeeder.Server.Infrastructure.Services.Contracts; | ||
using SixLabors.ImageSharp.Formats.Jpeg; | ||
using SixLabors.ImageSharp; | ||
using SixLabors.ImageSharp.Processing; | ||
|
||
namespace RssFeeder.Server.Infrastructure.Services.Implementations | ||
{ | ||
public class ExtractImage : IExtractImage | ||
{ | ||
private readonly IHttpClientFactory _httpClientFactory; | ||
private const int MAX_ALLOWED_WIDTH = 120; | ||
|
||
public ExtractImage(IHttpClientFactory httpClientFactory) | ||
{ | ||
_httpClientFactory = httpClientFactory; | ||
} | ||
|
||
public async Task<string> GetImageBase64ByHref(string href) | ||
{ | ||
var httpClient = _httpClientFactory.CreateClient(); | ||
|
||
HttpResponseMessage response = await httpClient.GetAsync(href); | ||
string html = await response.Content.ReadAsStringAsync(); | ||
|
||
var parser = new HtmlParser(); | ||
var document = parser.ParseDocument(html); | ||
|
||
if (document is null || document.Head is null) return string.Empty; | ||
|
||
var previewImageUrl = GetPreviewImageUrl(document); | ||
|
||
if (!string.IsNullOrEmpty(previewImageUrl)) | ||
{ | ||
// Download the preview image and resize it | ||
using (var imageStream = await httpClient.GetStreamAsync(previewImageUrl)) | ||
{ | ||
return await ResizeImage(imageStream); | ||
} | ||
} | ||
|
||
return string.Empty; | ||
} | ||
|
||
private string GetPreviewImageUrl(IHtmlDocument document) | ||
{ | ||
// Try to extract the preview image URL using the 'og:image' meta tag | ||
var ogImageNode = document!.Head!.QuerySelector("meta[property='og:image']"); | ||
string previewImageUrl = ogImageNode?.GetAttribute("content"); | ||
|
||
// If the 'og:image' meta tag is not present, try the 'twitter:image' meta tag | ||
if (previewImageUrl == null) | ||
{ | ||
var twitterImageNode = document.Head.QuerySelector("meta[property='twitter:image']"); | ||
previewImageUrl = twitterImageNode?.GetAttribute("content"); | ||
} | ||
|
||
// If the 'twitter:image' meta tag is not present, try the 'link[rel='image_src']' tag | ||
if (previewImageUrl == null) | ||
{ | ||
var imageSrcNode = document.Head.QuerySelector("link[rel='image_src']"); | ||
previewImageUrl = imageSrcNode?.GetAttribute("href"); | ||
} | ||
|
||
// If none of the above methods work, try selecting the first 'img' tag with a 'src' attribute | ||
if (previewImageUrl == null) | ||
{ | ||
var firstImageNode = document.QuerySelector("img[src]"); | ||
previewImageUrl = firstImageNode?.GetAttribute("src"); | ||
} | ||
|
||
return previewImageUrl; | ||
} | ||
|
||
private async Task<string> ResizeImage(Stream imageStream) | ||
{ | ||
using (var image = await Image.LoadAsync(imageStream)) | ||
{ | ||
int maxWidth = MAX_ALLOWED_WIDTH; | ||
|
||
if (image.Width > maxWidth) | ||
{ | ||
image.Mutate(x => x.Resize(new ResizeOptions | ||
{ | ||
Size = new Size(maxWidth, 0), | ||
Mode = ResizeMode.Max | ||
})); | ||
} | ||
|
||
// Save the resized image to a file or stream | ||
// For example, you can save it to a file like this: | ||
// image.Save("preview.jpg", new JpegEncoder()); | ||
|
||
// Convert the resized image to a Base64 string | ||
var memoryStream = new MemoryStream(); | ||
image.Save(memoryStream, new JpegEncoder()); | ||
var base64String = Convert.ToBase64String(memoryStream.ToArray()); | ||
|
||
return base64String; | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.