perf: cache regex patterns in Parser.ToUrlSlug and FileNameBuilder.GetEditionToken (#82)

Co-authored-by: admin <admin@ardentleatherworks.com>
This commit is contained in:
Cody Kickertz 2025-12-19 19:35:29 -06:00 committed by GitHub
parent f2fff6419d
commit 934a18e9a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 22 additions and 5 deletions

View file

@ -56,6 +56,10 @@ public class FileNameBuilder : IBuildFileNames
private static readonly Regex ReservedDeviceNamesRegex = new Regex(@"^(?:aux|com[1-9]|con|lpt[1-9]|nul|prn)\.", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// Edition token regex patterns
private static readonly Regex EditionOrdinalRegex = new Regex(@"((?:\b|_)\d{1,3}(?:st|th|rd|nd)(?:\b|_))", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex EditionUppercaseRegex = new Regex(@"((?:\b|_)(?:IMAX|3D|SDR|HDR|DV)(?:\b|_))", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// generated from https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
public static readonly ImmutableDictionary<string, string> Iso639BTMap = new Dictionary<string, string>
{
@ -533,8 +537,8 @@ private static string GetEditionToken(MovieFile movieFile)
{
var edition = CultureInfo.CurrentCulture.TextInfo.ToTitleCase(movieFile.Edition.ToLowerInvariant());
edition = Regex.Replace(edition, @"((?:\b|_)\d{1,3}(?:st|th|rd|nd)(?:\b|_))", match => match.Groups[1].Value.ToLowerInvariant(), RegexOptions.IgnoreCase);
edition = Regex.Replace(edition, @"((?:\b|_)(?:IMAX|3D|SDR|HDR|DV)(?:\b|_))", match => match.Groups[1].Value.ToUpperInvariant(), RegexOptions.IgnoreCase);
edition = EditionOrdinalRegex.Replace(edition, match => match.Groups[1].Value.ToLowerInvariant());
edition = EditionUppercaseRegex.Replace(edition, match => match.Groups[1].Value.ToUpperInvariant());
return edition;
}

View file

@ -137,6 +137,11 @@ public static class Parser
private static readonly Regex RequestInfoRegex = new Regex(@"^(?:\[.+?\])+", RegexOptions.Compiled);
// ToUrlSlug regex patterns
private static readonly Regex SlugSpaceRegex = new Regex(@"\s", RegexOptions.Compiled);
private static readonly Regex SlugInvalidCharsRegex = new Regex(@"[^a-z0-9\s-_]", RegexOptions.Compiled);
private static readonly Regex SlugDuplicateDefaultRegex = new Regex(@"([-_]){2,}", RegexOptions.Compiled);
private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
private static readonly Regex MultiRegex = new (@"[_. ](?<multi>multi)[_. ]", RegexOptions.Compiled | RegexOptions.IgnoreCase);
@ -405,13 +410,13 @@ public static string ToUrlSlug(string value, bool invalidDashReplacement = false
value = value.RemoveAccent();
// Replace spaces
value = Regex.Replace(value, @"\s", "-", RegexOptions.Compiled);
value = SlugSpaceRegex.Replace(value, "-");
// Should invalid characters be replaced with dash or empty string?
var replaceCharacter = invalidDashReplacement ? "-" : string.Empty;
// Remove invalid chars
value = Regex.Replace(value, @"[^a-z0-9\s-_]", replaceCharacter, RegexOptions.Compiled);
value = SlugInvalidCharsRegex.Replace(value, replaceCharacter);
// Trim dashes or underscores from end, or user defined character set
if (!string.IsNullOrEmpty(trimEndChars))
@ -422,7 +427,15 @@ public static string ToUrlSlug(string value, bool invalidDashReplacement = false
// Replace double occurrences of - or _, or user defined character set
if (!string.IsNullOrEmpty(deduplicateChars))
{
value = Regex.Replace(value, @"([" + deduplicateChars + "]){2,}", "$1", RegexOptions.Compiled);
// Use cached regex for default pattern, otherwise create dynamic pattern
if (deduplicateChars == "-_")
{
value = SlugDuplicateDefaultRegex.Replace(value, "$1");
}
else
{
value = Regex.Replace(value, @"([" + deduplicateChars + "]){2,}", "$1", RegexOptions.Compiled);
}
}
return value;