fix: fix iqiyi xml parse failed. close #100

This commit is contained in:
cxfksword
2025-11-19 11:08:46 +08:00
parent e331329edd
commit ab5ead5d62
2 changed files with 93 additions and 23 deletions

View File

@@ -96,11 +96,16 @@ namespace Jellyfin.Plugin.Danmu.Test
{ {
var vid = "132987200"; var vid = "132987200";
var result = await api.GetDanmuContentByMatAsync(vid, 1, CancellationToken.None); var result = await api.GetDanmuContentByMatAsync(vid, 1, CancellationToken.None);
Console.WriteLine(result); Console.WriteLine($"获取到 {result.Count} 条弹幕");
if (result.Count > 0)
{
Console.WriteLine($"第一条弹幕:{result[0].Content} (时间:{result[0].ShowTime}s)");
}
} }
catch (Exception ex) catch (Exception ex)
{ {
Console.WriteLine(ex.Message); Console.WriteLine($"错误:{ex.Message}");
Console.WriteLine($"堆栈:{ex.StackTrace}");
} }
}).GetAwaiter().GetResult(); }).GetAwaiter().GetResult();
} }
@@ -114,17 +119,29 @@ namespace Jellyfin.Plugin.Danmu.Test
{ {
try try
{ {
var vid = "132987200"; var vid = "2569036400194800";
var result = await api.GetDanmuContentAsync(vid, CancellationToken.None); var result = await api.GetDanmuContentAsync(vid, CancellationToken.None);
Console.WriteLine(result); Console.WriteLine($"获取到 {result.Count} 条弹幕");
if (result.Count > 0)
{
Console.WriteLine($"第一条弹幕:{result[0].Content} (时间:{result[0].ShowTime}s)");
}
} }
catch (Exception ex) catch (Exception ex)
{ {
Console.WriteLine(ex.Message); Console.WriteLine($"错误:{ex.Message}");
Console.WriteLine($"堆栈:{ex.StackTrace}");
} }
}).GetAwaiter().GetResult(); }).GetAwaiter().GetResult();
} }
[TestMethod]
public void TestRemoveInvalidXmlChars()
{
// 测试包含垂直制表符和换页符
var textWithVtFf = "<name>挽星&#0;&#128293;</name>";
Assert.AreEqual("<name>挽星&#128293;</name>", IqiyiApi.RemoveInvalidXmlChars(textWithVtFf));
}
} }
} }

View File

@@ -1,9 +1,11 @@
using System.IO;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net.Http.Json; using System.Net.Http.Json;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.IO;
using System.Threading; using System.Threading;
using System.Threading.Tasks; using System.Threading.Tasks;
using System.Web; using System.Web;
@@ -64,7 +66,7 @@ public class IqiyiApi : AbstractApi
keyword = HttpUtility.UrlEncode(keyword); keyword = HttpUtility.UrlEncode(keyword);
var url = $"https://search.video.iqiyi.com/o?if=html5&key={keyword}&pageNum=1&pageSize=20"; var url = $"https://search.video.iqiyi.com/o?if=html5&key={keyword}&pageNum=1&pageSize=20";
var response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false); using var response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode(); response.EnsureSuccessStatusCode();
var result = new List<IqiyiSearchAlbumInfo>(); var result = new List<IqiyiSearchAlbumInfo>();
@@ -147,7 +149,7 @@ public class IqiyiApi : AbstractApi
using (var request = new HttpRequestMessage(HttpMethod.Get, url)) using (var request = new HttpRequestMessage(HttpMethod.Get, url))
{ {
request.Headers.Add("user-agent", MOBILE_USER_AGENT); request.Headers.Add("user-agent", MOBILE_USER_AGENT);
var response = await this.httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false); using var response = await this.httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode(); response.EnsureSuccessStatusCode();
var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false); var body = await response.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);
@@ -180,7 +182,7 @@ public class IqiyiApi : AbstractApi
} }
var url = $"https://pcw-api.iqiyi.com/albums/album/avlistinfo?aid={albumId}&page=1&size={size}"; var url = $"https://pcw-api.iqiyi.com/albums/album/avlistinfo?aid={albumId}&page=1&size={size}";
var response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false); using var response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode(); response.EnsureSuccessStatusCode();
var albumResult = await response.Content.ReadFromJsonAsync<IqiyiVideoResult>(_jsonOptions, cancellationToken).ConfigureAwait(false); var albumResult = await response.Content.ReadFromJsonAsync<IqiyiVideoResult>(_jsonOptions, cancellationToken).ConfigureAwait(false);
@@ -203,7 +205,7 @@ public class IqiyiApi : AbstractApi
} }
var url = $"https://pcw-api.iqiyi.com/album/album/baseinfo/{albumId}"; var url = $"https://pcw-api.iqiyi.com/album/album/baseinfo/{albumId}";
var response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false); using var response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode(); response.EnsureSuccessStatusCode();
var albumResult = await response.Content.ReadFromJsonAsync<IqiyiAlbumResult>(_jsonOptions, cancellationToken).ConfigureAwait(false); var albumResult = await response.Content.ReadFromJsonAsync<IqiyiAlbumResult>(_jsonOptions, cancellationToken).ConfigureAwait(false);
@@ -224,10 +226,10 @@ public class IqiyiApi : AbstractApi
var year = begin.Year; var year = begin.Year;
var month = begin.ToString("MM"); var month = begin.ToString("MM");
url = $"https://pub.m.iqiyi.com/h5/main/videoList/source/month/?sourceId={albumId}&year={year}&month={month}"; url = $"https://pub.m.iqiyi.com/h5/main/videoList/source/month/?sourceId={albumId}&year={year}&month={month}";
response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false); using var monthResponse = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode(); monthResponse.EnsureSuccessStatusCode();
var videoListResult = await response.Content.ReadFromJsonAsync<IqiyiVideoListResult>(_jsonOptions, cancellationToken).ConfigureAwait(false); var videoListResult = await monthResponse.Content.ReadFromJsonAsync<IqiyiVideoListResult>(_jsonOptions, cancellationToken).ConfigureAwait(false);
if (videoListResult != null && videoListResult.Data != null && videoListResult.Data.Videos != null && videoListResult.Data.Videos.Count > 0) if (videoListResult != null && videoListResult.Data != null && videoListResult.Data.Videos != null && videoListResult.Data.Videos.Count > 0)
{ {
list.AddRange(videoListResult.Data.Videos.Where(x => !x.ShortTitle.Contains("精编版") && !x.ShortTitle.Contains("会员版"))); list.AddRange(videoListResult.Data.Videos.Where(x => !x.ShortTitle.Contains("精编版") && !x.ShortTitle.Contains("会员版")));
@@ -271,6 +273,11 @@ public class IqiyiApi : AbstractApi
// 每段有300秒弹幕为避免弹幕太大从中间隔抽取最大60秒200条弹幕 // 每段有300秒弹幕为避免弹幕太大从中间隔抽取最大60秒200条弹幕
danmuList.AddRange(comments.ExtractToNumber(1000)); danmuList.AddRange(comments.ExtractToNumber(1000));
} }
catch (InvalidOperationException ex)
{
_logger.LogError("获取爱奇艺弹幕({0})出错:{1}", tvId, ex.Message);
break;
}
catch (Exception ex) catch (Exception ex)
{ {
break; break;
@@ -298,7 +305,7 @@ public class IqiyiApi : AbstractApi
var s2 = tvId.Substring(tvId.Length - 2); var s2 = tvId.Substring(tvId.Length - 2);
// 一次拿300秒的弹幕 // 一次拿300秒的弹幕
var url = $"http://cmts.iqiyi.com/bullet/{s1}/{s2}/{tvId}_300_{mat}.z"; var url = $"http://cmts.iqiyi.com/bullet/{s1}/{s2}/{tvId}_300_{mat}.z";
var response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false); using var response = await httpClient.GetAsync(url, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode(); response.EnsureSuccessStatusCode();
using (var zipStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false)) using (var zipStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false))
@@ -317,19 +324,45 @@ public class IqiyiApi : AbstractApi
} }
memoryStream.Position = 0; memoryStream.Position = 0;
using (var reader = new StreamReader(memoryStream)) using (var reader = new StreamReader(memoryStream, leaveOpen: true))
{ {
var serializer = new XmlSerializer(typeof(IqiyiCommentDocument)); var serializer = new XmlSerializer(typeof(IqiyiCommentDocument));
var result = serializer.Deserialize(reader) as IqiyiCommentDocument; try
if (result != null && result.Data != null)
{ {
var comments = new List<IqiyiComment>(); var result = serializer.Deserialize(reader) as IqiyiCommentDocument;
foreach (var entry in result.Data) if (result != null && result.Data != null)
{ {
comments.AddRange(entry.List); var comments = new List<IqiyiComment>();
foreach (var entry in result.Data)
{
comments.AddRange(entry.List);
}
return comments;
}
}
catch (InvalidOperationException ex)
{
// 重置 MemoryStream 位置并创建新的 StreamReader
memoryStream.Position = 0;
using (var cleanReader = new StreamReader(memoryStream, leaveOpen: true))
{
var xmlContent = cleanReader.ReadToEnd();
var cleanXml = RemoveInvalidXmlChars(xmlContent);
using (var stringReader = new StringReader(cleanXml))
{
var result = serializer.Deserialize(stringReader) as IqiyiCommentDocument;
if (result != null && result.Data != null)
{
var comments = new List<IqiyiComment>();
foreach (var entry in result.Data)
{
comments.AddRange(entry.List);
}
return comments;
}
}
} }
return comments;
} }
} }
} }
@@ -339,10 +372,30 @@ public class IqiyiApi : AbstractApi
return new List<IqiyiComment>(); return new List<IqiyiComment>();
} }
/// <summary>
/// 移除 XML 字符串中的无效字符(控制字符和零宽字符).
/// </summary>
/// <param name="xml">需要清理的 XML 字符串.</param>
/// <returns>清理后的 XML 字符串.</returns>
public static string RemoveInvalidXmlChars(string xml)
{
if (string.IsNullOrEmpty(xml))
{
return xml;
}
// 移除 XML 非法字符:
// \u0000-\u0008: NULL 及其他控制字符
// \u000B-\u000C: 垂直制表符和换页符
// \u000E-\u001F: 其他控制字符
// \u200B-\u200D: 零宽字符(零宽空格、零宽不连字符、零宽连字符)
// \uFEFF: 零宽非断空格BOM
string pattern = @"[\u0000-\u0008\u000B\u000C\u000E-\u001F\u200B-\u200D\uFEFF]|&#0;";
return Regex.Replace(xml, pattern, string.Empty);
}
protected async Task LimitRequestFrequently() protected async Task LimitRequestFrequently()
{ {
await this._timeConstraint; await this._timeConstraint;
} }
} }