using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using SixLabors.ImageSharp; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; using System.Numerics; using System.Drawing; using SessionOptions = Microsoft.ML.OnnxRuntime.SessionOptions; using PointF = SixLabors.ImageSharp.PointF; namespace CaptchaBreaker { public sealed class ChineseClick0 : IDisposable { private const int Canvas = 384; private const int Patch = 96; private const float ConfThreshold = 0.5f; private const float SplitY = 344f; private readonly InferenceSession _yolo; private readonly InferenceSession _siamese; public ChineseClick0(string yoloModelPath, string siameseModelPath, SessionOptions? options = null) { if (!File.Exists(yoloModelPath)) throw new FileNotFoundException("YOLO 模型未找到", yoloModelPath); if (!File.Exists(siameseModelPath)) throw new FileNotFoundException("Siamese 模型未找到", siameseModelPath); options ??= new SessionOptions(); // 默认 CPU EP _yolo = new InferenceSession(yoloModelPath, options); _siamese = new InferenceSession(siameseModelPath, options); } public List Run(string imagePath) { using var img = Image.Load(imagePath); return Run(img); } public List Run(Image raw) { if (raw.Width > Canvas || raw.Height > Canvas) throw new InvalidOperationException("不能输入大于384长宽的图片!"); using var image = PreprocessToCanvas(raw); // 1. YOLO 检测 var boxes = Detect(image); // 2. 分离答案框与问题框,并按 x 从左到右排序 var (ans, ques) = SplitBoxes(boxes); if (ans.Count == 0 || ques.Count == 0) return new List(); // 3. 裁切并缩放为 96x96,按 [ans..., ques...] 组成批次 var (batch, dims) = CropAndResizeBatch(image, ans, ques); // 4. Siamese 提取特征 var feats = ExtractFeatures(batch, dims); // 5. 构建成本矩阵:question × answer var cost = BuildCostMatrix(feats, ans.Count); // 6. 匈牙利分配,返回 question -> answer 的映射 var assign = Hungarian.Solve(cost); // 长度 = questionCount,值域 [0..ansCount-1] // 7. 生成结果:按题面顺序返回匹配到的答案框中心点 return GenerateResults(ans, assign); } private static Image PreprocessToCanvas(Image src) { var canvas = new Image(Canvas, Canvas, new Rgba32(0, 0, 0, 255)); canvas.Mutate(ctx => { ctx.DrawImage(src, new SixLabors.ImageSharp.Point(0, 0), 1f); }); return canvas; } private List Detect(Image img) { // 准备 CHW float32 [1,3,384,384], 0..1 var input = new DenseTensor(new[] { 1, 3, Canvas, Canvas }); img.ProcessPixelRows(accessor => { for (int y = 0; y < Canvas; y++) { var row = accessor.GetRowSpan(y); for (int x = 0; x < Canvas; x++) { var p = row[x]; int idx = y * Canvas + x; input[0, 0, y, x] = p.R / 255f; input[0, 1, y, x] = p.G / 255f; input[0, 2, y, x] = p.B / 255f; } } }); var inputs = new List { NamedOnnxValue.CreateFromTensor("images", input) }; using var results = _yolo.Run(inputs); // 解析输出:假设为 [1, N, 6] -> (x_min, y_min, x_max, y_max, conf, cls) var y = results.FirstOrDefault(r => r.Name == "output0") ?? results.First(); var t = y.AsTensor(); var dims = t.Dimensions; // 尝试兼容 [1,N,6] 或 [1,6,N] var list = new List(); if (dims.Length == 3 && dims[0] == 1 && dims[2] == 6) { int n = dims[1]; for (int i = 0; i < n; i++) { float conf = t[0, i, 4]; if (conf <= ConfThreshold) continue; list.Add(new BBox( t[0, i, 0], t[0, i, 1], t[0, i, 2], t[0, i, 3], conf, t[0, i, 5])); } } else if (dims.Length == 3 && dims[0] == 1 && dims[1] == 6) { int n = dims[2]; for (int i = 0; i < n; i++) { float conf = t[0, 4, i]; if (conf <= ConfThreshold) continue; list.Add(new BBox( t[0, 0, i], t[0, 1, i], t[0, 2, i], t[0, 3, i], conf, t[0, 5, i])); } } else { throw new NotSupportedException($"不支持的 YOLO 输出维度:[{string.Join(",", dims.ToArray())}]"); } return list; } private static (List ans, List ques) SplitBoxes(List boxes) { boxes.Sort((a, b) => a.XMin.CompareTo(b.XMin)); // 按 x 从左到右 var ans = new List(); var ques = new List(); foreach (var b in boxes) { if (b.YMin < SplitY) ans.Add(b); else ques.Add(b); } return (ans, ques); } private static (DenseTensor batch, int[] dims) CropAndResizeBatch(Image img, List ans, List ques) { int a = ans.Count, q = ques.Count, total = a + q; var tensor = new DenseTensor(new[] { total, 3, Patch, Patch }); void ProcessOne(int index, BBox b) { var rect = ToSafeRect(b, img.Width, img.Height); using var cropped = img.Clone(ctx => ctx.Crop(rect).Resize(Patch, Patch, KnownResamplers.Lanczos3)); cropped.ProcessPixelRows(rows => { for (int y = 0; y < Patch; y++) { var row = rows.GetRowSpan(y); for (int x = 0; x < Patch; x++) { var p = row[x]; tensor[index, 0, y, x] = p.R / 255f; tensor[index, 1, y, x] = p.G / 255f; tensor[index, 2, y, x] = p.B / 255f; } } }); } for (int i = 0; i < a; i++) ProcessOne(i, ans[i]); for (int i = 0; i < q; i++) ProcessOne(a + i, ques[i]); return (tensor, new[] { total, 3, Patch, Patch }); } private float[,] ExtractFeatures(DenseTensor batch, int[] dims) { var inputs = new List { NamedOnnxValue.CreateFromTensor("input", batch) }; using var results = _siamese.Run(inputs); var y = results.FirstOrDefault(r => r.Name == "output") ?? results.First(); var t = y.AsTensor(); // 期望 [batch, feat] if (t.Dimensions.Length != 2 || t.Dimensions[0] != dims[0]) { throw new NotSupportedException($"不支持的 Siamese 输出维度:[{string.Join(",", t.Dimensions.ToArray())}]"); } int rows = t.Dimensions[0]; int cols = t.Dimensions[1]; var feats = new float[rows, cols]; for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) feats[i, j] = t[i, j]; return feats; } private static float[,] BuildCostMatrix(float[,] feats, int ansCount) { int total = feats.GetLength(0); int featDim = feats.GetLength(1); int q = total - ansCount; int a = ansCount; var cost = new float[q, a]; for (int i = 0; i < q; i++) { for (int j = 0; j < a; j++) { float sum = 0f; for (int k = 0; k < featDim; k++) { float d = feats[ansCount + i, k] - feats[j, k]; // question - answer sum += d * d; } cost[i, j] = MathF.Sqrt(sum); } } return cost; } private static List GenerateResults(List ans, int[] assign) { var res = new List(assign.Length); foreach (var aIdx in assign) { var b = ans[aIdx]; res.Add(new PointF((b.XMin + b.XMax) / 2f, (b.YMin + b.YMax) / 2f)); } return res; } private static SixLabors.ImageSharp.Rectangle ToSafeRect(BBox b, int w, int h) { int x = Math.Clamp((int)MathF.Floor(b.XMin), 0, w - 1); int y = Math.Clamp((int)MathF.Floor(b.YMin), 0, h - 1); int rw = Math.Clamp((int)MathF.Ceiling(b.XMax - b.XMin), 1, w - x); int rh = Math.Clamp((int)MathF.Ceiling(b.YMax - b.YMin), 1, h - y); return new SixLabors.ImageSharp.Rectangle(x, y, rw, rh); } public void Dispose() { _yolo.Dispose(); _siamese.Dispose(); } private readonly record struct BBox(float XMin, float YMin, float XMax, float YMax, float Confidence, float Class); } // 最小化代价匹配(匈牙利算法),输入为 q×a 的代价矩阵,返回长度 q 的数组 rowsol:第 i 个问题匹配到的答案列索引 public static class Hungarian { public static int[] Solve(float[,] cost) { int nRows = cost.GetLength(0); int nCols = cost.GetLength(1); // 若列少于行,补齐为方阵 int n = Math.Max(nRows, nCols); var a = new float[n, n]; for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) a[i, j] = (i < nRows && j < nCols) ? cost[i, j] : 0f; // 行最小值归一 for (int i = 0; i < n; i++) { float min = float.PositiveInfinity; for (int j = 0; j < n; j++) min = Math.Min(min, a[i, j]); for (int j = 0; j < n; j++) a[i, j] -= min; } // 列最小值归一 for (int j = 0; j < n; j++) { float min = float.PositiveInfinity; for (int i = 0; i < n; i++) min = Math.Min(min, a[i, j]); for (int i = 0; i < n; i++) a[i, j] -= min; } var starred = new bool[n, n]; var primed = new bool[n, n]; var rowCovered = new bool[n]; var colCovered = new bool[n]; // 初始:对每行选择第一个 0 且该列未被占用,打星 for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { if (a[i, j] == 0 && !RowHasStar(starred, i, n) && !ColHasStar(starred, j, n)) { starred[i, j] = true; break; } } } CoverStarredColumns(starred, colCovered, n); while (CountTrue(colCovered) < n) { (int r, int c) = FindZero(a, rowCovered, colCovered, n); while (r == -1) { AdjustMatrix(a, rowCovered, colCovered, n); (r, c) = FindZero(a, rowCovered, colCovered, n); } primed[r, c] = true; int starCol = FindStarInRow(starred, r, n); if (starCol != -1) { rowCovered[r] = true; colCovered[starCol] = false; } else { // 交替路径:从这个打撇的 0 开始 var path = new List<(int r, int c)> { (r, c) }; int col = c; int row; while (true) { row = FindStarInCol(starred, col, n); if (row == -1) break; path.Add((row, col)); col = FindPrimeInRow(primed, row, n); path.Add((row, col)); } // 交替:星改非星,撇改星 foreach (var (rr, cc) in path) { if (starred[rr, cc]) starred[rr, cc] = false; else starred[rr, cc] = true; } // 清空撇与覆盖 Array.Clear(rowCovered, 0, n); Array.Clear(colCovered, 0, n); Array.Clear(primed, 0, primed.Length); CoverStarredColumns(starred, colCovered, n); } } // 构造结果 var rowsol = new int[nRows]; for (int i = 0; i < nRows; i++) { int j = FindStarInRow(starred, i, n); rowsol[i] = (j < nCols) ? j : Math.Min(i, nCols - 1); } return rowsol; } private static void CoverStarredColumns(bool[,] starred, bool[] colCovered, int n) { Array.Clear(colCovered, 0, colCovered.Length); for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) if (starred[i, j]) colCovered[j] = true; } private static (int, int) FindZero(float[,] a, bool[] rowCovered, bool[] colCovered, int n) { for (int i = 0; i < n; i++) if (!rowCovered[i]) for (int j = 0; j < n; j++) if (!colCovered[j] && a[i, j] == 0) return (i, j); return (-1, -1); } private static void AdjustMatrix(float[,] a, bool[] rowCovered, bool[] colCovered, int n) { float min = float.PositiveInfinity; for (int i = 0; i < n; i++) if (!rowCovered[i]) for (int j = 0; j < n; j++) if (!colCovered[j]) min = Math.Min(min, a[i, j]); for (int i = 0; i < n; i++) { if (rowCovered[i]) for (int j = 0; j < n; j++) a[i, j] += min; } for (int j = 0; j < n; j++) { if (!colCovered[j]) for (int i = 0; i < n; i++) a[i, j] -= min; } } private static bool RowHasStar(bool[,] starred, int r, int n) { for (int j = 0; j < n; j++) if (starred[r, j]) return true; return false; } private static bool ColHasStar(bool[,] starred, int c, int n) { for (int i = 0; i < n; i++) if (starred[i, c]) return true; return false; } private static int FindStarInRow(bool[,] starred, int r, int n) { for (int j = 0; j < n; j++) if (starred[r, j]) return j; return -1; } private static int FindStarInCol(bool[,] starred, int c, int n) { for (int i = 0; i < n; i++) if (starred[i, c]) return i; return -1; } private static int FindPrimeInRow(bool[,] primed, int r, int n) { for (int j = 0; j < n; j++) if (primed[r, j]) return j; return -1; } private static int CountTrue(bool[] a) => a.Count(x => x); } }