ttrss-co-pilot/tasks/Sponsorblock.cs
Adam R Grey 3958eee080 filter out distasteful characters. namely apostrophe.
not an illegal character in a file path, which yt-dlp would handle for me, but given how I send it off to ffmpeg I trip over this sometimes
2023-04-10 14:43:27 -04:00

128 lines
6.9 KiB
C#

using System;
using System.Text;
using Newtonsoft.Json;
using System.Linq;
using System.Diagnostics;
using System.Text.RegularExpressions;
using ttrss_co_client.ttrss;
using ttrss_co_client.ttrss.datastructures;
namespace ttrss_co_client.tasks
{
///<summary>Move to output</summary>
public class Sponsorblock : Phase2Task
{
public override string TaskName => "sponsorblock";
public override async Task<Tuple<TaskStatus, WorkOrder>>ActOn(WorkOrder workOrder)
{
var sw = new Stopwatch();
sw.Start();
var article = (await TtrssClient.GetArticles(workOrder.articleId)).FirstOrDefault();
if(article == null)
{
Console.Error.WriteLine($"couldn't find article {workOrder.articleId} for workOrder {workOrder.guid}?!");
return new Tuple<TaskStatus, WorkOrder>(TaskStatus.Done, null);
}
if (!article.link.Host.EndsWith("youtube.com"))
{
Console.WriteLine("no sponsorblock segments on sites other than YT");
return new Tuple<TaskStatus, WorkOrder>(TaskStatus.ContinueNow, workOrder);
}
var match = Regex.Match(article.link.Query, "v=([^&]+)(&|$)");
var videoId = match.Groups?[1].Value;
var c = new HttpClient();
var sponsorblockcheck = await c.GetAsync($"https://sponsor.ajay.app/api/skipSegments?videoID={videoId}&category=sponsor&category=selfpromo&category=interaciton&category=intro&category=outro&category=preview&category=interaction");
IEnumerable<sponsorblock.Segment> segments = null;
try
{
segments = JsonConvert.DeserializeObject<IEnumerable<sponsorblock.Segment>>(await sponsorblockcheck.Content.ReadAsStringAsync());
}
catch (Exception e)
{
segments = null;
}
if (sponsorblockcheck.StatusCode == System.Net.HttpStatusCode.NotFound || segments == null || segments.Count() == 0)
{
Console.WriteLine($"sponsorblock reports that {videoId} has no entries (yet)");
var updateTimestamp = new DateTime(1970, 1, 1, 0, 0, 0, 0, DateTimeKind.Utc);
updateTimestamp = updateTimestamp.AddSeconds(article.updated).ToLocalTime();
if (DateTime.Now - updateTimestamp > TimeSpan.FromMinutes(45))
{
await TtrssClient.UpdateArticleNote($"{article.note}\n[{DateTime.Now.ToString("o")}] updated {updateTimestamp} (more than 45 minutes ago), going to give up waiting for sponsorblock", article.id);
return new Tuple<TaskStatus, WorkOrder>(TaskStatus.ContinueNow, workOrder);
}
else
{
await TtrssClient.UpdateArticleNote($"{article.note}\n[{DateTime.Now.ToString("o")}] waiting for sponsorblock segments", article.id);
workOrder.Phase2TaskList[workOrder.Phase2TaskList.Keys.Min() - 1] = this.TaskName;
return new Tuple<TaskStatus, WorkOrder>(TaskStatus.TryLater, workOrder);
}
}
else
{
Console.WriteLine($"[{DateTime.Now.ToString("o")}] sponsorblock reports {segments.Count()} junk segments");
var contentSegments = new List<Tuple<double, double>>();
var previousEdge = 0.0;
var extension = workOrder.data["path"].Substring(workOrder.data["path"].LastIndexOf('.'));
var intermediatePathBase = workOrder.data["path"].Substring(0, workOrder.data["path"].LastIndexOf('.'));
var conversionProcesses = new List<Tuple<Process, Tuple<int, string>>>();
foreach(var junkSegment in segments.OrderBy(s => s.segment[0]))
{
contentSegments.Add(new Tuple<double, double>(previousEdge, junkSegment.segment[0]));
previousEdge = junkSegment.segment[1];
}
contentSegments.Add(new Tuple<double, double>(previousEdge, segments.First().videoDuration));
contentSegments = contentSegments.Except(contentSegments.Where(tup => tup.Item2 - tup.Item1 < 0.5)).ToList();
#region ffmpeg via intermediate files
var intermediateCount = 0;
foreach(var seg in contentSegments)
{
var intermediateTargetPath = $"{intermediatePathBase}-intermediate-{intermediateCount.ToString("D2")}{extension}";
conversionProcesses.Add(new Tuple<Process, Tuple<int, string>>(
Process.Start("ffmpeg", $"-y -loglevel quiet -i \"{workOrder.data["path"]}\" -ss {seg.Item1} -to {seg.Item2} \"{intermediateTargetPath}\""),
new Tuple<int, string>(intermediateCount, intermediateTargetPath)
));
Console.WriteLine("waiting for exit from task, for debugging reasons");
conversionProcesses.Last().Item1.WaitForExit();
intermediateCount++;
}
Console.WriteLine($"[{DateTime.Now.ToString("o")}] intermediate content segments being exported");
var intermediates = new Dictionary<int, string>();
foreach(var proc in conversionProcesses)
{
proc.Item1.WaitForExit();
intermediates[proc.Item2.Item1] = proc.Item2.Item2;
}
Console.WriteLine($"[{DateTime.Now.ToString("o")}] intermediate content segments should be exported, stitching together");
var sb = new StringBuilder();
sb.AppendLine("#ffmpeg demands it");
foreach(var intermediate in intermediates.OrderBy(kvp => kvp.Key))
{
sb.AppendLine($"file '{intermediate.Value}'");
}
var ffmpegFile = Path.Combine(Path.GetDirectoryName(workOrder.data["path"]), "ffmpeglist.txt");
File.WriteAllText(ffmpegFile, sb.ToString());
Process.Start("ffmpeg", $"-y -f concat -safe 0 -i {ffmpegFile} -c copy \"{workOrder.data["path"]}\"")
.WaitForExit();
File.Delete(ffmpegFile);
Console.WriteLine($"[{DateTime.Now.ToString("o")}] intermediate content segments stitched. Deleting originals.");
foreach(var intermediate in intermediates.Values)
{
File.Delete(intermediate);
}
#endregion
sw.Stop();
await TtrssClient.UpdateArticleNote($"{article.note}\n[{DateTime.Now.ToString("o")}] removed {segments.Count()} junk segments", article.id);
return new Tuple<TaskStatus, WorkOrder>(TaskStatus.ContinueNow, workOrder);
}
}
}
}