fix: 修复文件列表为空时的返回值处理并添加图像处理相关依赖

This commit is contained in:
Megghy
2025-09-29 13:41:32 +08:00
parent 7abe753401
commit 360cc79e18
12 changed files with 1913 additions and 0 deletions

453
Modules/CaptchaClick.cs Normal file
View File

@@ -0,0 +1,453 @@
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using System.Numerics;
using System.Drawing;
using SessionOptions = Microsoft.ML.OnnxRuntime.SessionOptions;
using PointF = SixLabors.ImageSharp.PointF;
namespace CaptchaBreaker
{
public sealed class ChineseClick0 : IDisposable
{
private const int Canvas = 384;
private const int Patch = 96;
private const float ConfThreshold = 0.5f;
private const float SplitY = 344f;
private readonly InferenceSession _yolo;
private readonly InferenceSession _siamese;
public ChineseClick0(string yoloModelPath, string siameseModelPath, SessionOptions? options = null)
{
if (!File.Exists(yoloModelPath)) throw new FileNotFoundException("YOLO 模型未找到", yoloModelPath);
if (!File.Exists(siameseModelPath)) throw new FileNotFoundException("Siamese 模型未找到", siameseModelPath);
options ??= new SessionOptions(); // 默认 CPU EP
_yolo = new InferenceSession(yoloModelPath, options);
_siamese = new InferenceSession(siameseModelPath, options);
}
public List<PointF> Run(string imagePath)
{
using var img = Image.Load<Rgba32>(imagePath);
return Run(img);
}
public List<PointF> Run(Image<Rgba32> raw)
{
if (raw.Width > Canvas || raw.Height > Canvas)
throw new InvalidOperationException("不能输入大于384长宽的图片!");
using var image = PreprocessToCanvas(raw);
// 1. YOLO 检测
var boxes = Detect(image);
// 2. 分离答案框与问题框,并按 x 从左到右排序
var (ans, ques) = SplitBoxes(boxes);
if (ans.Count == 0 || ques.Count == 0)
return new List<PointF>();
// 3. 裁切并缩放为 96x96按 [ans..., ques...] 组成批次
var (batch, dims) = CropAndResizeBatch(image, ans, ques);
// 4. Siamese 提取特征
var feats = ExtractFeatures(batch, dims);
// 5. 构建成本矩阵question × answer
var cost = BuildCostMatrix(feats, ans.Count);
// 6. 匈牙利分配,返回 question -> answer 的映射
var assign = Hungarian.Solve(cost); // 长度 = questionCount值域 [0..ansCount-1]
// 7. 生成结果:按题面顺序返回匹配到的答案框中心点
return GenerateResults(ans, assign);
}
private static Image<Rgba32> PreprocessToCanvas(Image<Rgba32> src)
{
var canvas = new Image<Rgba32>(Canvas, Canvas, new Rgba32(0, 0, 0, 255));
canvas.Mutate(ctx =>
{
ctx.DrawImage(src, new SixLabors.ImageSharp.Point(0, 0), 1f);
});
return canvas;
}
private List<BBox> Detect(Image<Rgba32> img)
{
// 准备 CHW float32 [1,3,384,384], 0..1
var input = new DenseTensor<float>(new[] { 1, 3, Canvas, Canvas });
img.ProcessPixelRows(accessor =>
{
for (int y = 0; y < Canvas; y++)
{
var row = accessor.GetRowSpan(y);
for (int x = 0; x < Canvas; x++)
{
var p = row[x];
int idx = y * Canvas + x;
input[0, 0, y, x] = p.R / 255f;
input[0, 1, y, x] = p.G / 255f;
input[0, 2, y, x] = p.B / 255f;
}
}
});
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("images", input)
};
using var results = _yolo.Run(inputs);
// 解析输出:假设为 [1, N, 6] -> (x_min, y_min, x_max, y_max, conf, cls)
var y = results.FirstOrDefault(r => r.Name == "output0") ?? results.First();
var t = y.AsTensor<float>();
var dims = t.Dimensions;
// 尝试兼容 [1,N,6] 或 [1,6,N]
var list = new List<BBox>();
if (dims.Length == 3 && dims[0] == 1 && dims[2] == 6)
{
int n = dims[1];
for (int i = 0; i < n; i++)
{
float conf = t[0, i, 4];
if (conf <= ConfThreshold) continue;
list.Add(new BBox(
t[0, i, 0], t[0, i, 1], t[0, i, 2], t[0, i, 3],
conf, t[0, i, 5]));
}
}
else if (dims.Length == 3 && dims[0] == 1 && dims[1] == 6)
{
int n = dims[2];
for (int i = 0; i < n; i++)
{
float conf = t[0, 4, i];
if (conf <= ConfThreshold) continue;
list.Add(new BBox(
t[0, 0, i], t[0, 1, i], t[0, 2, i], t[0, 3, i],
conf, t[0, 5, i]));
}
}
else
{
throw new NotSupportedException($"不支持的 YOLO 输出维度:[{string.Join(",", dims.ToArray())}]");
}
return list;
}
private static (List<BBox> ans, List<BBox> ques) SplitBoxes(List<BBox> boxes)
{
boxes.Sort((a, b) => a.XMin.CompareTo(b.XMin)); // 按 x 从左到右
var ans = new List<BBox>();
var ques = new List<BBox>();
foreach (var b in boxes)
{
if (b.YMin < SplitY) ans.Add(b);
else ques.Add(b);
}
return (ans, ques);
}
private static (DenseTensor<float> batch, int[] dims) CropAndResizeBatch(Image<Rgba32> img, List<BBox> ans, List<BBox> ques)
{
int a = ans.Count, q = ques.Count, total = a + q;
var tensor = new DenseTensor<float>(new[] { total, 3, Patch, Patch });
void ProcessOne(int index, BBox b)
{
var rect = ToSafeRect(b, img.Width, img.Height);
using var cropped = img.Clone(ctx => ctx.Crop(rect).Resize(Patch, Patch, KnownResamplers.Lanczos3));
cropped.ProcessPixelRows(rows =>
{
for (int y = 0; y < Patch; y++)
{
var row = rows.GetRowSpan(y);
for (int x = 0; x < Patch; x++)
{
var p = row[x];
tensor[index, 0, y, x] = p.R / 255f;
tensor[index, 1, y, x] = p.G / 255f;
tensor[index, 2, y, x] = p.B / 255f;
}
}
});
}
for (int i = 0; i < a; i++) ProcessOne(i, ans[i]);
for (int i = 0; i < q; i++) ProcessOne(a + i, ques[i]);
return (tensor, new[] { total, 3, Patch, Patch });
}
private float[,] ExtractFeatures(DenseTensor<float> batch, int[] dims)
{
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("input", batch)
};
using var results = _siamese.Run(inputs);
var y = results.FirstOrDefault(r => r.Name == "output") ?? results.First();
var t = y.AsTensor<float>();
// 期望 [batch, feat]
if (t.Dimensions.Length != 2 || t.Dimensions[0] != dims[0])
{
throw new NotSupportedException($"不支持的 Siamese 输出维度:[{string.Join(",", t.Dimensions.ToArray())}]");
}
int rows = t.Dimensions[0];
int cols = t.Dimensions[1];
var feats = new float[rows, cols];
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
feats[i, j] = t[i, j];
return feats;
}
private static float[,] BuildCostMatrix(float[,] feats, int ansCount)
{
int total = feats.GetLength(0);
int featDim = feats.GetLength(1);
int q = total - ansCount;
int a = ansCount;
var cost = new float[q, a];
for (int i = 0; i < q; i++)
{
for (int j = 0; j < a; j++)
{
float sum = 0f;
for (int k = 0; k < featDim; k++)
{
float d = feats[ansCount + i, k] - feats[j, k]; // question - answer
sum += d * d;
}
cost[i, j] = MathF.Sqrt(sum);
}
}
return cost;
}
private static List<PointF> GenerateResults(List<BBox> ans, int[] assign)
{
var res = new List<PointF>(assign.Length);
foreach (var aIdx in assign)
{
var b = ans[aIdx];
res.Add(new PointF((b.XMin + b.XMax) / 2f, (b.YMin + b.YMax) / 2f));
}
return res;
}
private static SixLabors.ImageSharp.Rectangle ToSafeRect(BBox b, int w, int h)
{
int x = Math.Clamp((int)MathF.Floor(b.XMin), 0, w - 1);
int y = Math.Clamp((int)MathF.Floor(b.YMin), 0, h - 1);
int rw = Math.Clamp((int)MathF.Ceiling(b.XMax - b.XMin), 1, w - x);
int rh = Math.Clamp((int)MathF.Ceiling(b.YMax - b.YMin), 1, h - y);
return new SixLabors.ImageSharp.Rectangle(x, y, rw, rh);
}
public void Dispose()
{
_yolo.Dispose();
_siamese.Dispose();
}
private readonly record struct BBox(float XMin, float YMin, float XMax, float YMax, float Confidence, float Class);
}
// 最小化代价匹配(匈牙利算法),输入为 q×a 的代价矩阵,返回长度 q 的数组 rowsol第 i 个问题匹配到的答案列索引
public static class Hungarian
{
public static int[] Solve(float[,] cost)
{
int nRows = cost.GetLength(0);
int nCols = cost.GetLength(1);
// 若列少于行,补齐为方阵
int n = Math.Max(nRows, nCols);
var a = new float[n, n];
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
a[i, j] = (i < nRows && j < nCols) ? cost[i, j] : 0f;
// 行最小值归一
for (int i = 0; i < n; i++)
{
float min = float.PositiveInfinity;
for (int j = 0; j < n; j++) min = Math.Min(min, a[i, j]);
for (int j = 0; j < n; j++) a[i, j] -= min;
}
// 列最小值归一
for (int j = 0; j < n; j++)
{
float min = float.PositiveInfinity;
for (int i = 0; i < n; i++) min = Math.Min(min, a[i, j]);
for (int i = 0; i < n; i++) a[i, j] -= min;
}
var starred = new bool[n, n];
var primed = new bool[n, n];
var rowCovered = new bool[n];
var colCovered = new bool[n];
// 初始:对每行选择第一个 0 且该列未被占用,打星
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
if (a[i, j] == 0 && !RowHasStar(starred, i, n) && !ColHasStar(starred, j, n))
{
starred[i, j] = true;
break;
}
}
}
CoverStarredColumns(starred, colCovered, n);
while (CountTrue(colCovered) < n)
{
(int r, int c) = FindZero(a, rowCovered, colCovered, n);
while (r == -1)
{
AdjustMatrix(a, rowCovered, colCovered, n);
(r, c) = FindZero(a, rowCovered, colCovered, n);
}
primed[r, c] = true;
int starCol = FindStarInRow(starred, r, n);
if (starCol != -1)
{
rowCovered[r] = true;
colCovered[starCol] = false;
}
else
{
// 交替路径:从这个打撇的 0 开始
var path = new List<(int r, int c)> { (r, c) };
int col = c;
int row;
while (true)
{
row = FindStarInCol(starred, col, n);
if (row == -1) break;
path.Add((row, col));
col = FindPrimeInRow(primed, row, n);
path.Add((row, col));
}
// 交替:星改非星,撇改星
foreach (var (rr, cc) in path)
{
if (starred[rr, cc]) starred[rr, cc] = false;
else starred[rr, cc] = true;
}
// 清空撇与覆盖
Array.Clear(rowCovered, 0, n);
Array.Clear(colCovered, 0, n);
Array.Clear(primed, 0, primed.Length);
CoverStarredColumns(starred, colCovered, n);
}
}
// 构造结果
var rowsol = new int[nRows];
for (int i = 0; i < nRows; i++)
{
int j = FindStarInRow(starred, i, n);
rowsol[i] = (j < nCols) ? j : Math.Min(i, nCols - 1);
}
return rowsol;
}
private static void CoverStarredColumns(bool[,] starred, bool[] colCovered, int n)
{
Array.Clear(colCovered, 0, colCovered.Length);
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
if (starred[i, j]) colCovered[j] = true;
}
private static (int, int) FindZero(float[,] a, bool[] rowCovered, bool[] colCovered, int n)
{
for (int i = 0; i < n; i++)
if (!rowCovered[i])
for (int j = 0; j < n; j++)
if (!colCovered[j] && a[i, j] == 0)
return (i, j);
return (-1, -1);
}
private static void AdjustMatrix(float[,] a, bool[] rowCovered, bool[] colCovered, int n)
{
float min = float.PositiveInfinity;
for (int i = 0; i < n; i++)
if (!rowCovered[i])
for (int j = 0; j < n; j++)
if (!colCovered[j])
min = Math.Min(min, a[i, j]);
for (int i = 0; i < n; i++)
{
if (rowCovered[i])
for (int j = 0; j < n; j++)
a[i, j] += min;
}
for (int j = 0; j < n; j++)
{
if (!colCovered[j])
for (int i = 0; i < n; i++)
a[i, j] -= min;
}
}
private static bool RowHasStar(bool[,] starred, int r, int n)
{
for (int j = 0; j < n; j++) if (starred[r, j]) return true;
return false;
}
private static bool ColHasStar(bool[,] starred, int c, int n)
{
for (int i = 0; i < n; i++) if (starred[i, c]) return true;
return false;
}
private static int FindStarInRow(bool[,] starred, int r, int n)
{
for (int j = 0; j < n; j++) if (starred[r, j]) return j;
return -1;
}
private static int FindStarInCol(bool[,] starred, int c, int n)
{
for (int i = 0; i < n; i++) if (starred[i, c]) return i;
return -1;
}
private static int FindPrimeInRow(bool[,] primed, int r, int n)
{
for (int j = 0; j < n; j++) if (primed[r, j]) return j;
return -1;
}
private static int CountTrue(bool[] a) => a.Count(x => x);
}
}