Skip to content

Commit 07b360a

Browse files
Copilotwaldekmastykarzgarrytrinder
authored
Fix emoji encoding in recorded .har files (#1557)
* Initial plan * Fix emoji encoding in recorded .har files Add UnsafeRelaxedJsonEscaping to ProxyUtils.JsonSerializerOptions to preserve BMP non-ASCII characters as literal UTF-8 in JSON output. Add surrogate pair unescaping in HarGeneratorPlugin to convert \uXXXX\uYYYY escape sequences for non-BMP characters (like emoji U+1F600+) back to their literal UTF-8 representation. Co-authored-by: waldekmastykarz <11164679+waldekmastykarz@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: waldekmastykarz <11164679+waldekmastykarz@users.noreply.github.com> Co-authored-by: Waldek Mastykarz <waldek@mastykarz.nl> Co-authored-by: Garry Trinder <garry@trinder365.co.uk>
1 parent 98de506 commit 07b360a

2 files changed

Lines changed: 18 additions & 0 deletions

File tree

DevProxy.Abstractions/Utils/ProxyUtils.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
using Newtonsoft.Json.Schema;
1010
using System.Collections.ObjectModel;
1111
using System.Reflection;
12+
using System.Text.Encodings.Web;
1213
using System.Text.Json;
1314
using System.Text.Json.Serialization;
1415
using System.Text.RegularExpressions;
@@ -44,6 +45,7 @@ public static class ProxyUtils
4445
public static JsonSerializerOptions JsonSerializerOptions { get; } = new()
4546
{
4647
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
48+
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
4749
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
4850
PropertyNameCaseInsensitive = true,
4951
ReadCommentHandling = JsonCommentHandling.Skip,

DevProxy.Plugins/Generation/HarGeneratorPlugin.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
using Microsoft.Extensions.Configuration;
1010
using Microsoft.Extensions.Logging;
1111
using System.Diagnostics;
12+
using System.Globalization;
1213
using System.Text.Json;
14+
using System.Text.RegularExpressions;
1315
using System.Web;
1416

1517
namespace DevProxy.Plugins.Generation;
@@ -33,6 +35,8 @@ public sealed class HarGeneratorPlugin(
3335
proxyConfiguration,
3436
pluginConfigurationSection)
3537
{
38+
private static readonly Regex surrogatePairRegex = new(@"\\u([dD][89aAbB][0-9a-fA-F]{2})\\u([dD][cCdDeEfF][0-9a-fA-F]{2})");
39+
3640
public override string Name => nameof(HarGeneratorPlugin);
3741

3842
public override async Task AfterRecordingStopAsync(RecordingArgs e, CancellationToken cancellationToken)
@@ -69,6 +73,7 @@ r.Context.Session is not null &&
6973

7074
Logger.LogDebug("Serializing HAR file...");
7175
var harFileJson = JsonSerializer.Serialize(harFile, ProxyUtils.JsonSerializerOptions);
76+
harFileJson = UnescapeSurrogatePairs(harFileJson);
7277
var fileName = $"devproxy-{DateTime.Now:yyyyMMddHHmmss}.har";
7378

7479
Logger.LogDebug("Writing HAR file to {FileName}...", fileName);
@@ -159,4 +164,15 @@ private HarEntry CreateHarEntry(RequestLog log)
159164

160165
return entry;
161166
}
167+
168+
private static string UnescapeSurrogatePairs(string json)
169+
{
170+
return surrogatePairRegex.Replace(json, match =>
171+
{
172+
var high = int.Parse(match.Groups[1].Value, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
173+
var low = int.Parse(match.Groups[2].Value, NumberStyles.HexNumber, CultureInfo.InvariantCulture);
174+
var codePoint = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
175+
return char.ConvertFromUtf32(codePoint);
176+
});
177+
}
162178
}

0 commit comments

Comments
 (0)