Files
MegghysAPI/Modules/CaptchaClick.cs

453 lines
16 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using System.Numerics;
using System.Drawing;
using SessionOptions = Microsoft.ML.OnnxRuntime.SessionOptions;
using PointF = SixLabors.ImageSharp.PointF;
namespace CaptchaBreaker
{
public sealed class ChineseClick0 : IDisposable
{
private const int Canvas = 384;
private const int Patch = 96;
private const float ConfThreshold = 0.5f;
private const float SplitY = 344f;
private readonly InferenceSession _yolo;
private readonly InferenceSession _siamese;
public ChineseClick0(string yoloModelPath, string siameseModelPath, SessionOptions? options = null)
{
if (!File.Exists(yoloModelPath)) throw new FileNotFoundException("YOLO 模型未找到", yoloModelPath);
if (!File.Exists(siameseModelPath)) throw new FileNotFoundException("Siamese 模型未找到", siameseModelPath);
options ??= new SessionOptions(); // 默认 CPU EP
_yolo = new InferenceSession(yoloModelPath, options);
_siamese = new InferenceSession(siameseModelPath, options);
}
public List<PointF> Run(string imagePath)
{
using var img = Image.Load<Rgba32>(imagePath);
return Run(img);
}
public List<PointF> Run(Image<Rgba32> raw)
{
if (raw.Width > Canvas || raw.Height > Canvas)
throw new InvalidOperationException("不能输入大于384长宽的图片!");
using var image = PreprocessToCanvas(raw);
// 1. YOLO 检测
var boxes = Detect(image);
// 2. 分离答案框与问题框,并按 x 从左到右排序
var (ans, ques) = SplitBoxes(boxes);
if (ans.Count == 0 || ques.Count == 0)
return new List<PointF>();
// 3. 裁切并缩放为 96x96按 [ans..., ques...] 组成批次
var (batch, dims) = CropAndResizeBatch(image, ans, ques);
// 4. Siamese 提取特征
var feats = ExtractFeatures(batch, dims);
// 5. 构建成本矩阵question × answer
var cost = BuildCostMatrix(feats, ans.Count);
// 6. 匈牙利分配,返回 question -> answer 的映射
var assign = Hungarian.Solve(cost); // 长度 = questionCount值域 [0..ansCount-1]
// 7. 生成结果:按题面顺序返回匹配到的答案框中心点
return GenerateResults(ans, assign);
}
private static Image<Rgba32> PreprocessToCanvas(Image<Rgba32> src)
{
var canvas = new Image<Rgba32>(Canvas, Canvas, new Rgba32(0, 0, 0, 255));
canvas.Mutate(ctx =>
{
ctx.DrawImage(src, new SixLabors.ImageSharp.Point(0, 0), 1f);
});
return canvas;
}
private List<BBox> Detect(Image<Rgba32> img)
{
// 准备 CHW float32 [1,3,384,384], 0..1
var input = new DenseTensor<float>(new[] { 1, 3, Canvas, Canvas });
img.ProcessPixelRows(accessor =>
{
for (int y = 0; y < Canvas; y++)
{
var row = accessor.GetRowSpan(y);
for (int x = 0; x < Canvas; x++)
{
var p = row[x];
int idx = y * Canvas + x;
input[0, 0, y, x] = p.R / 255f;
input[0, 1, y, x] = p.G / 255f;
input[0, 2, y, x] = p.B / 255f;
}
}
});
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("images", input)
};
using var results = _yolo.Run(inputs);
// 解析输出:假设为 [1, N, 6] -> (x_min, y_min, x_max, y_max, conf, cls)
var y = results.FirstOrDefault(r => r.Name == "output0") ?? results.First();
var t = y.AsTensor<float>();
var dims = t.Dimensions;
// 尝试兼容 [1,N,6] 或 [1,6,N]
var list = new List<BBox>();
if (dims.Length == 3 && dims[0] == 1 && dims[2] == 6)
{
int n = dims[1];
for (int i = 0; i < n; i++)
{
float conf = t[0, i, 4];
if (conf <= ConfThreshold) continue;
list.Add(new BBox(
t[0, i, 0], t[0, i, 1], t[0, i, 2], t[0, i, 3],
conf, t[0, i, 5]));
}
}
else if (dims.Length == 3 && dims[0] == 1 && dims[1] == 6)
{
int n = dims[2];
for (int i = 0; i < n; i++)
{
float conf = t[0, 4, i];
if (conf <= ConfThreshold) continue;
list.Add(new BBox(
t[0, 0, i], t[0, 1, i], t[0, 2, i], t[0, 3, i],
conf, t[0, 5, i]));
}
}
else
{
throw new NotSupportedException($"不支持的 YOLO 输出维度:[{string.Join(",", dims.ToArray())}]");
}
return list;
}
private static (List<BBox> ans, List<BBox> ques) SplitBoxes(List<BBox> boxes)
{
boxes.Sort((a, b) => a.XMin.CompareTo(b.XMin)); // 按 x 从左到右
var ans = new List<BBox>();
var ques = new List<BBox>();
foreach (var b in boxes)
{
if (b.YMin < SplitY) ans.Add(b);
else ques.Add(b);
}
return (ans, ques);
}
private static (DenseTensor<float> batch, int[] dims) CropAndResizeBatch(Image<Rgba32> img, List<BBox> ans, List<BBox> ques)
{
int a = ans.Count, q = ques.Count, total = a + q;
var tensor = new DenseTensor<float>(new[] { total, 3, Patch, Patch });
void ProcessOne(int index, BBox b)
{
var rect = ToSafeRect(b, img.Width, img.Height);
using var cropped = img.Clone(ctx => ctx.Crop(rect).Resize(Patch, Patch, KnownResamplers.Lanczos3));
cropped.ProcessPixelRows(rows =>
{
for (int y = 0; y < Patch; y++)
{
var row = rows.GetRowSpan(y);
for (int x = 0; x < Patch; x++)
{
var p = row[x];
tensor[index, 0, y, x] = p.R / 255f;
tensor[index, 1, y, x] = p.G / 255f;
tensor[index, 2, y, x] = p.B / 255f;
}
}
});
}
for (int i = 0; i < a; i++) ProcessOne(i, ans[i]);
for (int i = 0; i < q; i++) ProcessOne(a + i, ques[i]);
return (tensor, new[] { total, 3, Patch, Patch });
}
private float[,] ExtractFeatures(DenseTensor<float> batch, int[] dims)
{
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("input", batch)
};
using var results = _siamese.Run(inputs);
var y = results.FirstOrDefault(r => r.Name == "output") ?? results.First();
var t = y.AsTensor<float>();
// 期望 [batch, feat]
if (t.Dimensions.Length != 2 || t.Dimensions[0] != dims[0])
{
throw new NotSupportedException($"不支持的 Siamese 输出维度:[{string.Join(",", t.Dimensions.ToArray())}]");
}
int rows = t.Dimensions[0];
int cols = t.Dimensions[1];
var feats = new float[rows, cols];
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
feats[i, j] = t[i, j];
return feats;
}
private static float[,] BuildCostMatrix(float[,] feats, int ansCount)
{
int total = feats.GetLength(0);
int featDim = feats.GetLength(1);
int q = total - ansCount;
int a = ansCount;
var cost = new float[q, a];
for (int i = 0; i < q; i++)
{
for (int j = 0; j < a; j++)
{
float sum = 0f;
for (int k = 0; k < featDim; k++)
{
float d = feats[ansCount + i, k] - feats[j, k]; // question - answer
sum += d * d;
}
cost[i, j] = MathF.Sqrt(sum);
}
}
return cost;
}
private static List<PointF> GenerateResults(List<BBox> ans, int[] assign)
{
var res = new List<PointF>(assign.Length);
foreach (var aIdx in assign)
{
var b = ans[aIdx];
res.Add(new PointF((b.XMin + b.XMax) / 2f, (b.YMin + b.YMax) / 2f));
}
return res;
}
private static SixLabors.ImageSharp.Rectangle ToSafeRect(BBox b, int w, int h)
{
int x = Math.Clamp((int)MathF.Floor(b.XMin), 0, w - 1);
int y = Math.Clamp((int)MathF.Floor(b.YMin), 0, h - 1);
int rw = Math.Clamp((int)MathF.Ceiling(b.XMax - b.XMin), 1, w - x);
int rh = Math.Clamp((int)MathF.Ceiling(b.YMax - b.YMin), 1, h - y);
return new SixLabors.ImageSharp.Rectangle(x, y, rw, rh);
}
public void Dispose()
{
_yolo.Dispose();
_siamese.Dispose();
}
private readonly record struct BBox(float XMin, float YMin, float XMax, float YMax, float Confidence, float Class);
}
// 最小化代价匹配(匈牙利算法),输入为 q×a 的代价矩阵,返回长度 q 的数组 rowsol第 i 个问题匹配到的答案列索引
public static class Hungarian
{
public static int[] Solve(float[,] cost)
{
int nRows = cost.GetLength(0);
int nCols = cost.GetLength(1);
// 若列少于行,补齐为方阵
int n = Math.Max(nRows, nCols);
var a = new float[n, n];
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
a[i, j] = (i < nRows && j < nCols) ? cost[i, j] : 0f;
// 行最小值归一
for (int i = 0; i < n; i++)
{
float min = float.PositiveInfinity;
for (int j = 0; j < n; j++) min = Math.Min(min, a[i, j]);
for (int j = 0; j < n; j++) a[i, j] -= min;
}
// 列最小值归一
for (int j = 0; j < n; j++)
{
float min = float.PositiveInfinity;
for (int i = 0; i < n; i++) min = Math.Min(min, a[i, j]);
for (int i = 0; i < n; i++) a[i, j] -= min;
}
var starred = new bool[n, n];
var primed = new bool[n, n];
var rowCovered = new bool[n];
var colCovered = new bool[n];
// 初始:对每行选择第一个 0 且该列未被占用,打星
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
if (a[i, j] == 0 && !RowHasStar(starred, i, n) && !ColHasStar(starred, j, n))
{
starred[i, j] = true;
break;
}
}
}
CoverStarredColumns(starred, colCovered, n);
while (CountTrue(colCovered) < n)
{
(int r, int c) = FindZero(a, rowCovered, colCovered, n);
while (r == -1)
{
AdjustMatrix(a, rowCovered, colCovered, n);
(r, c) = FindZero(a, rowCovered, colCovered, n);
}
primed[r, c] = true;
int starCol = FindStarInRow(starred, r, n);
if (starCol != -1)
{
rowCovered[r] = true;
colCovered[starCol] = false;
}
else
{
// 交替路径:从这个打撇的 0 开始
var path = new List<(int r, int c)> { (r, c) };
int col = c;
int row;
while (true)
{
row = FindStarInCol(starred, col, n);
if (row == -1) break;
path.Add((row, col));
col = FindPrimeInRow(primed, row, n);
path.Add((row, col));
}
// 交替:星改非星,撇改星
foreach (var (rr, cc) in path)
{
if (starred[rr, cc]) starred[rr, cc] = false;
else starred[rr, cc] = true;
}
// 清空撇与覆盖
Array.Clear(rowCovered, 0, n);
Array.Clear(colCovered, 0, n);
Array.Clear(primed, 0, primed.Length);
CoverStarredColumns(starred, colCovered, n);
}
}
// 构造结果
var rowsol = new int[nRows];
for (int i = 0; i < nRows; i++)
{
int j = FindStarInRow(starred, i, n);
rowsol[i] = (j < nCols) ? j : Math.Min(i, nCols - 1);
}
return rowsol;
}
private static void CoverStarredColumns(bool[,] starred, bool[] colCovered, int n)
{
Array.Clear(colCovered, 0, colCovered.Length);
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
if (starred[i, j]) colCovered[j] = true;
}
private static (int, int) FindZero(float[,] a, bool[] rowCovered, bool[] colCovered, int n)
{
for (int i = 0; i < n; i++)
if (!rowCovered[i])
for (int j = 0; j < n; j++)
if (!colCovered[j] && a[i, j] == 0)
return (i, j);
return (-1, -1);
}
private static void AdjustMatrix(float[,] a, bool[] rowCovered, bool[] colCovered, int n)
{
float min = float.PositiveInfinity;
for (int i = 0; i < n; i++)
if (!rowCovered[i])
for (int j = 0; j < n; j++)
if (!colCovered[j])
min = Math.Min(min, a[i, j]);
for (int i = 0; i < n; i++)
{
if (rowCovered[i])
for (int j = 0; j < n; j++)
a[i, j] += min;
}
for (int j = 0; j < n; j++)
{
if (!colCovered[j])
for (int i = 0; i < n; i++)
a[i, j] -= min;
}
}
private static bool RowHasStar(bool[,] starred, int r, int n)
{
for (int j = 0; j < n; j++) if (starred[r, j]) return true;
return false;
}
private static bool ColHasStar(bool[,] starred, int c, int n)
{
for (int i = 0; i < n; i++) if (starred[i, c]) return true;
return false;
}
private static int FindStarInRow(bool[,] starred, int r, int n)
{
for (int j = 0; j < n; j++) if (starred[r, j]) return j;
return -1;
}
private static int FindStarInCol(bool[,] starred, int c, int n)
{
for (int i = 0; i < n; i++) if (starred[i, c]) return i;
return -1;
}
private static int FindPrimeInRow(bool[,] primed, int r, int n)
{
for (int j = 0; j < n; j++) if (primed[r, j]) return j;
return -1;
}
private static int CountTrue(bool[] a) => a.Count(x => x);
}
}