Add CRT-Lottes shader via FFmpeg program_opencl

Implements Timothy Lottes' CRT shader (port of mpv-retro-shaders) as an
optional server-side post-processing filter activated per playback session
via streamOptions[crtShader]=true.

Server-side changes:
- Add crt_lottes.cl OpenCL kernel (scanlines, bloom, curvature, shadow mask
  variants 0-4, sRGB linearisation) deployed alongside the server binary
- Add IsCrtShaderEnabled / GetCrtShaderOclFilters / GetCrtShaderFilter
  helpers to EncodingHelper
- SW pipeline: format=rgba → hwupload → program_opencl → hwdownload
- Intel VAAPI + OCL tonemap: inline scale_opencl round-trip (zero PCIe)
- Intel VAAPI, VAAPI encoder, no tonemap: hwmap→opencl→CRT→hwmap (zero PCIe)
- Intel VAAPI, SW encoder, no tonemap: hwmap→opencl→CRT→hwdownload (1× PCIe)
- AMD VAAPI + VK tonemap: hwmap→opencl→CRT→hwmap after scale_vaapi (zero PCIe)
- AMD VAAPI, SW encoder: hwmap→opencl→CRT→hwdownload (1× PCIe)

Shadow mask variant is configurable via streamOptions[crtShadowMask]=0..4.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mani
2026-02-24 14:19:31 +01:00
parent 0bc8b92b6a
commit 6fbc0f87df
3 changed files with 436 additions and 2 deletions

View File

@@ -84,6 +84,9 @@
<None Update="ServerSetupApp/index.mstemplate.html">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Resources/Shaders/crt_lottes.cl">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,307 @@
// CRT Lottes shader OpenCL implementation for FFmpeg program_opencl
// Port of Timothy Lottes' CRT shader (public domain).
// Adapted from the mpv-retro-shaders GLSL version.
//
// Copyright (c) 2022, The mpv-retro-shaders Contributors
// Copyright (c) 2024, Jellyfin Contributors
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
// Parameters (override via build_opts, e.g. -DSHADOW_MASK=2)
#ifndef HARD_SCAN
#define HARD_SCAN (-8.0f)
#endif
#ifndef CURVATURE_X
#define CURVATURE_X (0.031f)
#endif
#ifndef CURVATURE_Y
#define CURVATURE_Y (0.041f)
#endif
#ifndef MASK_DARK
#define MASK_DARK (0.5f)
#endif
#ifndef MASK_LIGHT
#define MASK_LIGHT (1.5f)
#endif
#ifndef SHADOW_MASK
#define SHADOW_MASK 2
#endif
#ifndef BRIGHTNESS_BOOST
#define BRIGHTNESS_BOOST (1.0f)
#endif
#ifndef HARD_BLOOM_SCAN
#define HARD_BLOOM_SCAN (-2.0f)
#endif
#ifndef BLOOM_AMOUNT
#define BLOOM_AMOUNT (1.0f / 16.0f)
#endif
#ifndef SHAPE
#define SHAPE (2.0f)
#endif
// sRGB linear
static float3 linearize_rgb(float3 c)
{
const float k0 = 0.05958483740687370300f;
const float k1 = 0.87031054496765136718f;
c = max(c, 0.0f);
c = k1 * pow(c + (float3)(k0), (float3)(2.4f));
return c;
}
static float3 delinearize_rgb(float3 c)
{
const float k0 = 0.05958483740687370300f;
const float k1 = 1.14901518821716308593f;
c = max(c, 0.0f);
c = pow(k1 * c, (float3)(1.0f / 2.4f)) - (float3)(k0);
return c;
}
// Texture helper
static float3 fetch_sample(
__read_only image2d_t src,
sampler_t smp,
float2 pos, // normalised (0..1) in SOURCE space
float2 off_texels, // offset in texel units
float2 src_size)
{
float2 p = pos + off_texels / src_size;
return BRIGHTNESS_BOOST * read_imagef(src, smp, p).xyz;
}
static float3 nearest_emulated_sample(
__read_only image2d_t src,
sampler_t smp,
float2 pos,
float2 off_texels,
float2 src_size)
{
return linearize_rgb(fetch_sample(src, smp, pos, off_texels, src_size));
}
// Gaussian kernel
static float gauss1d(float pos, float scale)
{
return exp2(scale * pow(fabs(pos), SHAPE));
}
// distance from pos to its nearest texel centre (fractional part, 0.5..+0.5)
static float2 distance_to_texel(float2 pos, float2 src_size)
{
return -1.0f * fract(pos * src_size - 0.5f);
}
// Horizontal reconstruction (3 / 5 / 7 tap)
static float3 horz3(
__read_only image2d_t src, sampler_t smp,
float2 pos, float off_y, float scale, float2 src_size)
{
float3 c = nearest_emulated_sample(src, smp, pos, (float2)(-1.0f, off_y), src_size);
float3 d = nearest_emulated_sample(src, smp, pos, (float2)( 0.0f, off_y), src_size);
float3 e = nearest_emulated_sample(src, smp, pos, (float2)( 1.0f, off_y), src_size);
float dst = distance_to_texel(pos, src_size).x;
float wc = gauss1d(dst - 1.0f, scale);
float wd = gauss1d(dst, scale);
float we = gauss1d(dst + 1.0f, scale);
return (c * wc + d * wd + e * we) / (wc + wd + we);
}
static float3 horz5(
__read_only image2d_t src, sampler_t smp,
float2 pos, float off_y, float scale, float2 src_size)
{
float3 b = nearest_emulated_sample(src, smp, pos, (float2)(-2.0f, off_y), src_size);
float3 c = nearest_emulated_sample(src, smp, pos, (float2)(-1.0f, off_y), src_size);
float3 d = nearest_emulated_sample(src, smp, pos, (float2)( 0.0f, off_y), src_size);
float3 e = nearest_emulated_sample(src, smp, pos, (float2)( 1.0f, off_y), src_size);
float3 f = nearest_emulated_sample(src, smp, pos, (float2)( 2.0f, off_y), src_size);
float dst = distance_to_texel(pos, src_size).x;
float wb = gauss1d(dst - 2.0f, scale);
float wc = gauss1d(dst - 1.0f, scale);
float wd = gauss1d(dst, scale);
float we = gauss1d(dst + 1.0f, scale);
float wf = gauss1d(dst + 2.0f, scale);
return (b * wb + c * wc + d * wd + e * we + f * wf) / (wb + wc + wd + we + wf);
}
static float3 horz7(
__read_only image2d_t src, sampler_t smp,
float2 pos, float off_y, float scale, float2 src_size)
{
float3 a = nearest_emulated_sample(src, smp, pos, (float2)(-3.0f, off_y), src_size);
float3 b = nearest_emulated_sample(src, smp, pos, (float2)(-2.0f, off_y), src_size);
float3 c = nearest_emulated_sample(src, smp, pos, (float2)(-1.0f, off_y), src_size);
float3 d = nearest_emulated_sample(src, smp, pos, (float2)( 0.0f, off_y), src_size);
float3 e = nearest_emulated_sample(src, smp, pos, (float2)( 1.0f, off_y), src_size);
float3 f = nearest_emulated_sample(src, smp, pos, (float2)( 2.0f, off_y), src_size);
float3 g = nearest_emulated_sample(src, smp, pos, (float2)( 3.0f, off_y), src_size);
float dst = distance_to_texel(pos, src_size).x;
float wa = gauss1d(dst - 3.0f, scale);
float wb = gauss1d(dst - 2.0f, scale);
float wc = gauss1d(dst - 1.0f, scale);
float wd = gauss1d(dst, scale);
float we = gauss1d(dst + 1.0f, scale);
float wf = gauss1d(dst + 2.0f, scale);
float wg = gauss1d(dst + 3.0f, scale);
return (a * wa + b * wb + c * wc + d * wd + e * we + f * wf + g * wg)
/ (wa + wb + wc + wd + we + wf + wg);
}
// Screen curvature
static float2 bend_screen(float2 pos)
{
pos = pos * 2.0f - 1.0f;
pos *= (float2)(1.0f + (pos.y * pos.y) * CURVATURE_X,
1.0f + (pos.x * pos.x) * CURVATURE_Y);
return pos * 0.5f + 0.5f;
}
// Scanline weights
static float scan_weight(float2 pos, float off, float2 src_size)
{
float dst = distance_to_texel(pos, src_size).y;
return gauss1d(dst + off, HARD_SCAN);
}
static float bloom_scan_weight(float2 pos, float off, float2 src_size)
{
float dst = distance_to_texel(pos, src_size).y;
return gauss1d(dst + off, HARD_BLOOM_SCAN);
}
// Main CRT reconstruction
static float3 tri(
__read_only image2d_t src, sampler_t smp,
float2 pos, float2 src_size)
{
float3 a = horz3(src, smp, pos, -1.0f, -10.0f, src_size);
float3 b = horz5(src, smp, pos, 0.0f, -10.0f, src_size);
float3 c = horz3(src, smp, pos, 1.0f, -10.0f, src_size);
float wa = scan_weight(pos, -1.0f, src_size);
float wb = scan_weight(pos, 0.0f, src_size);
float wc = scan_weight(pos, 1.0f, src_size);
return a * wa + b * wb + c * wc;
}
static float3 bloom(
__read_only image2d_t src, sampler_t smp,
float2 pos, float2 src_size)
{
float3 a = horz5(src, smp, pos, -2.0f, -3.0f, src_size);
float3 b = horz7(src, smp, pos, -1.0f, -1.5f, src_size);
float3 c = horz7(src, smp, pos, 0.0f, -1.5f, src_size);
float3 d = horz7(src, smp, pos, 1.0f, -1.5f, src_size);
float3 e = horz5(src, smp, pos, 2.0f, -3.0f, src_size);
float wa = bloom_scan_weight(pos, -2.0f, src_size);
float wb = bloom_scan_weight(pos, -1.0f, src_size);
float wc = bloom_scan_weight(pos, 0.0f, src_size);
float wd = bloom_scan_weight(pos, 1.0f, src_size);
float we = bloom_scan_weight(pos, 2.0f, src_size);
return a * wa + b * wb + c * wc + d * wd + e * we;
}
// Shadow mask
static float3 apply_mask(float2 px)
{
float3 m = (float3)(MASK_DARK);
#if SHADOW_MASK == 1
float line = MASK_LIGHT;
float odd = (fract(px.x / 6.0f) < 0.5f) ? 1.0f : 0.0f;
if (fract((px.y + odd) / 2.0f) < 0.5f) line = MASK_DARK;
float mx = fract(px.x / 3.0f);
if (mx < 1.0f / 3.0f) m.x = MASK_LIGHT;
else if (mx < 2.0f / 3.0f) m.y = MASK_LIGHT;
else m.z = MASK_LIGHT;
m *= line;
#elif SHADOW_MASK == 2
float mx2 = fract(px.x / 3.0f);
if (mx2 < 1.0f / 3.0f) m.x = MASK_LIGHT;
else if (mx2 < 2.0f / 3.0f) m.y = MASK_LIGHT;
else m.z = MASK_LIGHT;
#elif SHADOW_MASK == 3
px.x += px.y * 3.0f;
float mx3 = fract(px.x / 6.0f);
if (mx3 < 1.0f / 3.0f) m.x = MASK_LIGHT;
else if (mx3 < 2.0f / 3.0f) m.y = MASK_LIGHT;
else m.z = MASK_LIGHT;
#elif SHADOW_MASK == 4
px = floor(px * (float2)(1.0f, 0.5f));
px.x += px.y * 3.0f;
float mx4 = fract(px.x / 6.0f);
if (mx4 < 1.0f / 3.0f) m.x = MASK_LIGHT;
else if (mx4 < 2.0f / 3.0f) m.y = MASK_LIGHT;
else m.z = MASK_LIGHT;
#endif
return m;
}
// Entry point
__kernel void crt_lottes(
__read_only image2d_t src,
__write_only image2d_t dst)
{
int2 coord = (int2)(get_global_id(0), get_global_id(1));
const int dst_w = get_image_width(dst);
const int dst_h = get_image_height(dst);
if (coord.x >= dst_w || coord.y >= dst_h)
return;
const int src_w = get_image_width(src);
const int src_h = get_image_height(src);
const float2 dst_size = (float2)(dst_w, dst_h);
const float2 src_size = (float2)(src_w, src_h);
// Linear (normalised) position in output space
const float2 out_pos = ((float2)(coord.x, coord.y) + 0.5f) / dst_size;
// Sampler: normalised coords + linear filter + clamp-to-edge
const sampler_t smp =
CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_LINEAR;
// Map to source coords (src may differ from dst when upscaling)
// With FFmpeg program_opencl output resolution matches input (same frame size).
const float2 src_pos = out_pos;
// Apply CRT barrel-curvature
float2 bent = bend_screen(src_pos);
// Main scanline reconstruction + bloom
float3 color = tri(src, smp, bent, src_size);
color += bloom(src, smp, bent, src_size) * BLOOM_AMOUNT;
// Shadow mask
#if SHADOW_MASK != 0
float2 px = floor(out_pos * dst_size) + 0.5f;
color *= apply_mask(px);
#endif
// Black outside the curved screen border
int in_bounds = (bent.x >= 0.0f && bent.x <= 1.0f &&
bent.y >= 0.0f && bent.y <= 1.0f) ? 1 : 0;
float3 result = in_bounds ? delinearize_rgb(color) : (float3)(0.0f);
write_imagef(dst, coord, (float4)(result, 1.0f));
}

View File

@@ -3708,6 +3708,76 @@ namespace MediaBrowser.Controller.MediaEncoding
};
}
/// <summary>
/// Returns true when the CRT-Lottes shader is requested via streamOptions and
/// the compiled kernel file exists next to the server binary.
/// </summary>
public bool IsCrtShaderEnabled(EncodingJobInfo state)
{
return state.BaseRequest.StreamOptions.TryGetValue("crtShader", out var val)
&& string.Equals(val, "true", StringComparison.OrdinalIgnoreCase)
&& File.Exists(Path.Combine(AppContext.BaseDirectory, "Resources", "Shaders", "crt_lottes.cl"));
}
private string GetCrtEscapedShaderPath()
{
return Path.Combine(AppContext.BaseDirectory, "Resources", "Shaders", "crt_lottes.cl")
.Replace("\\", "/", StringComparison.Ordinal)
.Replace(":", "\\:", StringComparison.Ordinal);
}
private string GetCrtBuildOpts(EncodingJobInfo state)
{
state.BaseRequest.StreamOptions.TryGetValue("crtShadowMask", out var maskVal);
var shadowMask = int.TryParse(maskVal, out var m) && m >= 0 && m <= 4 ? m : 2;
return FormattableString.Invariant($"-DSHADOW_MASK={shadowMask}");
}
/// <summary>
/// Returns three OpenCL-native filters (scale_opencl → program_opencl → scale_opencl)
/// that apply the CRT shader while the frame stays in GPU VRAM.
/// Use this when the pipeline is already in an OpenCL hardware context.
/// Returns an empty list when the shader is disabled.
/// </summary>
public List<string> GetCrtShaderOclFilters(EncodingJobInfo state)
{
if (!IsCrtShaderEnabled(state))
{
return [];
}
var escapedPath = GetCrtEscapedShaderPath();
var buildOpts = GetCrtBuildOpts(state);
return
[
"scale_opencl=w=iw:h=ih:format=rgba",
FormattableString.Invariant(
$"program_opencl=source={escapedPath}:kernel=crt_lottes:build_opts='{buildOpts}'"),
"scale_opencl=w=iw:h=ih:format=nv12"
];
}
/// <summary>
/// Gets the FFmpeg filter chain that applies the CRT-Lottes OpenCL shader
/// for the software (CPU-memory) pipeline path.
/// Includes hwupload/hwdownload around the OpenCL kernel.
/// Returns an empty string when the shader is disabled.
/// </summary>
public string GetCrtShaderFilter(EncodingJobInfo state)
{
if (!IsCrtShaderEnabled(state))
{
return string.Empty;
}
var escapedPath = GetCrtEscapedShaderPath();
var buildOpts = GetCrtBuildOpts(state);
return FormattableString.Invariant(
$"format=rgba,hwupload=derive_device=opencl,program_opencl=source={escapedPath}:kernel=crt_lottes:build_opts='{buildOpts}',hwdownload,format=yuv420p");
}
/// <summary>
/// Gets the parameter of software filter chain.
/// </summary>
@@ -3829,6 +3899,14 @@ namespace MediaBrowser.Controller.MediaEncoding
overlayFilters.Add("overlay=eof_action=pass:repeatlast=0");
}
// CRT-Lottes OpenCL post-processing (applied after subtitle burn-in,
// only when explicitly requested via streamOptions[crtShader]=true).
var crtFilter = GetCrtShaderFilter(state);
if (!string.IsNullOrEmpty(crtFilter))
{
mainFilters.Add(crtFilter);
}
return (mainFilters, subFilters, overlayFilters);
}
@@ -5084,6 +5162,10 @@ namespace MediaBrowser.Controller.MediaEncoding
{
var tonemapFilter = GetHwTonemapFilter(options, "opencl", "nv12", isMjpegEncoder);
mainFilters.Add(tonemapFilter);
// CRT shader inline — frame is already in OpenCL NV12, zero PCIe transfer.
// scale_opencl converts NV12→RGBA for the kernel, then back to NV12.
mainFilters.AddRange(GetCrtShaderOclFilters(state));
}
if (doOclTonemap && isVaInVaOut)
@@ -5093,6 +5175,15 @@ namespace MediaBrowser.Controller.MediaEncoding
mainFilters.Add("hwmap=derive_device=vaapi:mode=write:reverse=1");
mainFilters.Add("format=vaapi");
}
else if (!doOclTonemap && isVaInVaOut && IsCrtShaderEnabled(state))
{
// CRT shader for pure-VAAPI path (no OCL tonemap already done).
// VAAPI → OpenCL (GPU-internal via hwmap, no PCIe) → CRT → back to VAAPI.
mainFilters.Add("hwmap=derive_device=opencl:mode=read");
mainFilters.AddRange(GetCrtShaderOclFilters(state));
mainFilters.Add("hwmap=derive_device=vaapi:mode=write:reverse=1");
mainFilters.Add("format=vaapi");
}
var memoryOutput = false;
var isUploadForOclTonemap = isSwDecoder && doOclTonemap;
@@ -5103,7 +5194,19 @@ namespace MediaBrowser.Controller.MediaEncoding
// OUTPUT nv12 surface(memory)
// prefer hwmap to hwdownload on opencl/vaapi.
mainFilters.Add(isHwmapNotUsable ? "hwdownload" : "hwmap=mode=read");
if (isVaapiDecoder && isSwEncoder && !doOclTonemap && IsCrtShaderEnabled(state))
{
// Frame still in VAAPI (no prior OCL step). Route through OpenCL for CRT,
// then download to CPU in one step (avoids double PCIe vs. SW-path CRT).
mainFilters.Add("hwmap=derive_device=opencl:mode=read");
mainFilters.AddRange(GetCrtShaderOclFilters(state));
mainFilters.Add("hwdownload");
}
else
{
mainFilters.Add(isHwmapNotUsable ? "hwdownload" : "hwmap=mode=read");
}
mainFilters.Add("format=nv12");
}
@@ -5343,6 +5446,15 @@ namespace MediaBrowser.Controller.MediaEncoding
// clear the surf->meta_offset and output nv12
mainFilters.Add("scale_vaapi=format=nv12");
// CRT shader via VAAPI→OpenCL→VAAPI round-trip (all in GPU VRAM).
if (IsCrtShaderEnabled(state))
{
mainFilters.Add("hwmap=derive_device=opencl:mode=read");
mainFilters.AddRange(GetCrtShaderOclFilters(state));
mainFilters.Add("hwmap=derive_device=vaapi:mode=write:reverse=1");
mainFilters.Add("format=vaapi");
}
// hw deint
if (doDeintH2645)
{
@@ -5356,7 +5468,19 @@ namespace MediaBrowser.Controller.MediaEncoding
// OUTPUT nv12 surface(memory)
if (isSwEncoder && (doVkTonemap || isVaapiDecoder))
{
mainFilters.Add("hwdownload");
if (IsCrtShaderEnabled(state) && !doVkTonemap)
{
// Frame in VAAPI (no Vulkan tonemap done). Route through OpenCL for CRT
// then download to CPU in one step.
mainFilters.Add("hwmap=derive_device=opencl:mode=read");
mainFilters.AddRange(GetCrtShaderOclFilters(state));
mainFilters.Add("hwdownload");
}
else
{
mainFilters.Add("hwdownload");
}
mainFilters.Add("format=nv12");
}