mirror of
https://github.com/Megghy/MegghysAPI.git
synced 2025-12-06 14:16:56 +08:00
453 lines
16 KiB
C#
453 lines
16 KiB
C#
using Microsoft.ML.OnnxRuntime;
|
||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||
using SixLabors.ImageSharp;
|
||
using SixLabors.ImageSharp.PixelFormats;
|
||
using SixLabors.ImageSharp.Processing;
|
||
using System.Numerics;
|
||
using System.Drawing;
|
||
using SessionOptions = Microsoft.ML.OnnxRuntime.SessionOptions;
|
||
using PointF = SixLabors.ImageSharp.PointF;
|
||
|
||
namespace CaptchaBreaker
|
||
{
|
||
public sealed class ChineseClick0 : IDisposable
|
||
{
|
||
private const int Canvas = 384;
|
||
private const int Patch = 96;
|
||
private const float ConfThreshold = 0.5f;
|
||
private const float SplitY = 344f;
|
||
|
||
private readonly InferenceSession _yolo;
|
||
private readonly InferenceSession _siamese;
|
||
|
||
public ChineseClick0(string yoloModelPath, string siameseModelPath, SessionOptions? options = null)
|
||
{
|
||
if (!File.Exists(yoloModelPath)) throw new FileNotFoundException("YOLO 模型未找到", yoloModelPath);
|
||
if (!File.Exists(siameseModelPath)) throw new FileNotFoundException("Siamese 模型未找到", siameseModelPath);
|
||
|
||
options ??= new SessionOptions(); // 默认 CPU EP
|
||
_yolo = new InferenceSession(yoloModelPath, options);
|
||
_siamese = new InferenceSession(siameseModelPath, options);
|
||
}
|
||
|
||
public List<PointF> Run(string imagePath)
|
||
{
|
||
using var img = Image.Load<Rgba32>(imagePath);
|
||
return Run(img);
|
||
}
|
||
|
||
public List<PointF> Run(Image<Rgba32> raw)
|
||
{
|
||
if (raw.Width > Canvas || raw.Height > Canvas)
|
||
throw new InvalidOperationException("不能输入大于384长宽的图片!");
|
||
|
||
using var image = PreprocessToCanvas(raw);
|
||
|
||
// 1. YOLO 检测
|
||
var boxes = Detect(image);
|
||
|
||
// 2. 分离答案框与问题框,并按 x 从左到右排序
|
||
var (ans, ques) = SplitBoxes(boxes);
|
||
|
||
if (ans.Count == 0 || ques.Count == 0)
|
||
return new List<PointF>();
|
||
|
||
// 3. 裁切并缩放为 96x96,按 [ans..., ques...] 组成批次
|
||
var (batch, dims) = CropAndResizeBatch(image, ans, ques);
|
||
|
||
// 4. Siamese 提取特征
|
||
var feats = ExtractFeatures(batch, dims);
|
||
|
||
// 5. 构建成本矩阵:question × answer
|
||
var cost = BuildCostMatrix(feats, ans.Count);
|
||
|
||
// 6. 匈牙利分配,返回 question -> answer 的映射
|
||
var assign = Hungarian.Solve(cost); // 长度 = questionCount,值域 [0..ansCount-1]
|
||
|
||
// 7. 生成结果:按题面顺序返回匹配到的答案框中心点
|
||
return GenerateResults(ans, assign);
|
||
}
|
||
|
||
private static Image<Rgba32> PreprocessToCanvas(Image<Rgba32> src)
|
||
{
|
||
var canvas = new Image<Rgba32>(Canvas, Canvas, new Rgba32(0, 0, 0, 255));
|
||
canvas.Mutate(ctx =>
|
||
{
|
||
ctx.DrawImage(src, new SixLabors.ImageSharp.Point(0, 0), 1f);
|
||
});
|
||
return canvas;
|
||
}
|
||
|
||
private List<BBox> Detect(Image<Rgba32> img)
|
||
{
|
||
// 准备 CHW float32 [1,3,384,384], 0..1
|
||
var input = new DenseTensor<float>(new[] { 1, 3, Canvas, Canvas });
|
||
img.ProcessPixelRows(accessor =>
|
||
{
|
||
for (int y = 0; y < Canvas; y++)
|
||
{
|
||
var row = accessor.GetRowSpan(y);
|
||
for (int x = 0; x < Canvas; x++)
|
||
{
|
||
var p = row[x];
|
||
int idx = y * Canvas + x;
|
||
input[0, 0, y, x] = p.R / 255f;
|
||
input[0, 1, y, x] = p.G / 255f;
|
||
input[0, 2, y, x] = p.B / 255f;
|
||
}
|
||
}
|
||
});
|
||
|
||
var inputs = new List<NamedOnnxValue>
|
||
{
|
||
NamedOnnxValue.CreateFromTensor("images", input)
|
||
};
|
||
|
||
using var results = _yolo.Run(inputs);
|
||
|
||
// 解析输出:假设为 [1, N, 6] -> (x_min, y_min, x_max, y_max, conf, cls)
|
||
var y = results.FirstOrDefault(r => r.Name == "output0") ?? results.First();
|
||
var t = y.AsTensor<float>();
|
||
var dims = t.Dimensions;
|
||
|
||
// 尝试兼容 [1,N,6] 或 [1,6,N]
|
||
var list = new List<BBox>();
|
||
if (dims.Length == 3 && dims[0] == 1 && dims[2] == 6)
|
||
{
|
||
int n = dims[1];
|
||
for (int i = 0; i < n; i++)
|
||
{
|
||
float conf = t[0, i, 4];
|
||
if (conf <= ConfThreshold) continue;
|
||
list.Add(new BBox(
|
||
t[0, i, 0], t[0, i, 1], t[0, i, 2], t[0, i, 3],
|
||
conf, t[0, i, 5]));
|
||
}
|
||
}
|
||
else if (dims.Length == 3 && dims[0] == 1 && dims[1] == 6)
|
||
{
|
||
int n = dims[2];
|
||
for (int i = 0; i < n; i++)
|
||
{
|
||
float conf = t[0, 4, i];
|
||
if (conf <= ConfThreshold) continue;
|
||
list.Add(new BBox(
|
||
t[0, 0, i], t[0, 1, i], t[0, 2, i], t[0, 3, i],
|
||
conf, t[0, 5, i]));
|
||
}
|
||
}
|
||
else
|
||
{
|
||
throw new NotSupportedException($"不支持的 YOLO 输出维度:[{string.Join(",", dims.ToArray())}]");
|
||
}
|
||
|
||
return list;
|
||
}
|
||
|
||
private static (List<BBox> ans, List<BBox> ques) SplitBoxes(List<BBox> boxes)
|
||
{
|
||
boxes.Sort((a, b) => a.XMin.CompareTo(b.XMin)); // 按 x 从左到右
|
||
var ans = new List<BBox>();
|
||
var ques = new List<BBox>();
|
||
foreach (var b in boxes)
|
||
{
|
||
if (b.YMin < SplitY) ans.Add(b);
|
||
else ques.Add(b);
|
||
}
|
||
return (ans, ques);
|
||
}
|
||
|
||
private static (DenseTensor<float> batch, int[] dims) CropAndResizeBatch(Image<Rgba32> img, List<BBox> ans, List<BBox> ques)
|
||
{
|
||
int a = ans.Count, q = ques.Count, total = a + q;
|
||
var tensor = new DenseTensor<float>(new[] { total, 3, Patch, Patch });
|
||
|
||
void ProcessOne(int index, BBox b)
|
||
{
|
||
var rect = ToSafeRect(b, img.Width, img.Height);
|
||
using var cropped = img.Clone(ctx => ctx.Crop(rect).Resize(Patch, Patch, KnownResamplers.Lanczos3));
|
||
|
||
cropped.ProcessPixelRows(rows =>
|
||
{
|
||
for (int y = 0; y < Patch; y++)
|
||
{
|
||
var row = rows.GetRowSpan(y);
|
||
for (int x = 0; x < Patch; x++)
|
||
{
|
||
var p = row[x];
|
||
tensor[index, 0, y, x] = p.R / 255f;
|
||
tensor[index, 1, y, x] = p.G / 255f;
|
||
tensor[index, 2, y, x] = p.B / 255f;
|
||
}
|
||
}
|
||
});
|
||
}
|
||
|
||
for (int i = 0; i < a; i++) ProcessOne(i, ans[i]);
|
||
for (int i = 0; i < q; i++) ProcessOne(a + i, ques[i]);
|
||
|
||
return (tensor, new[] { total, 3, Patch, Patch });
|
||
}
|
||
|
||
private float[,] ExtractFeatures(DenseTensor<float> batch, int[] dims)
|
||
{
|
||
var inputs = new List<NamedOnnxValue>
|
||
{
|
||
NamedOnnxValue.CreateFromTensor("input", batch)
|
||
};
|
||
|
||
using var results = _siamese.Run(inputs);
|
||
var y = results.FirstOrDefault(r => r.Name == "output") ?? results.First();
|
||
var t = y.AsTensor<float>();
|
||
|
||
// 期望 [batch, feat]
|
||
if (t.Dimensions.Length != 2 || t.Dimensions[0] != dims[0])
|
||
{
|
||
throw new NotSupportedException($"不支持的 Siamese 输出维度:[{string.Join(",", t.Dimensions.ToArray())}]");
|
||
}
|
||
|
||
int rows = t.Dimensions[0];
|
||
int cols = t.Dimensions[1];
|
||
var feats = new float[rows, cols];
|
||
for (int i = 0; i < rows; i++)
|
||
for (int j = 0; j < cols; j++)
|
||
feats[i, j] = t[i, j];
|
||
return feats;
|
||
}
|
||
|
||
private static float[,] BuildCostMatrix(float[,] feats, int ansCount)
|
||
{
|
||
int total = feats.GetLength(0);
|
||
int featDim = feats.GetLength(1);
|
||
int q = total - ansCount;
|
||
int a = ansCount;
|
||
|
||
var cost = new float[q, a];
|
||
for (int i = 0; i < q; i++)
|
||
{
|
||
for (int j = 0; j < a; j++)
|
||
{
|
||
float sum = 0f;
|
||
for (int k = 0; k < featDim; k++)
|
||
{
|
||
float d = feats[ansCount + i, k] - feats[j, k]; // question - answer
|
||
sum += d * d;
|
||
}
|
||
cost[i, j] = MathF.Sqrt(sum);
|
||
}
|
||
}
|
||
return cost;
|
||
}
|
||
|
||
private static List<PointF> GenerateResults(List<BBox> ans, int[] assign)
|
||
{
|
||
var res = new List<PointF>(assign.Length);
|
||
foreach (var aIdx in assign)
|
||
{
|
||
var b = ans[aIdx];
|
||
res.Add(new PointF((b.XMin + b.XMax) / 2f, (b.YMin + b.YMax) / 2f));
|
||
}
|
||
return res;
|
||
}
|
||
|
||
private static SixLabors.ImageSharp.Rectangle ToSafeRect(BBox b, int w, int h)
|
||
{
|
||
int x = Math.Clamp((int)MathF.Floor(b.XMin), 0, w - 1);
|
||
int y = Math.Clamp((int)MathF.Floor(b.YMin), 0, h - 1);
|
||
int rw = Math.Clamp((int)MathF.Ceiling(b.XMax - b.XMin), 1, w - x);
|
||
int rh = Math.Clamp((int)MathF.Ceiling(b.YMax - b.YMin), 1, h - y);
|
||
return new SixLabors.ImageSharp.Rectangle(x, y, rw, rh);
|
||
}
|
||
|
||
public void Dispose()
|
||
{
|
||
_yolo.Dispose();
|
||
_siamese.Dispose();
|
||
}
|
||
|
||
private readonly record struct BBox(float XMin, float YMin, float XMax, float YMax, float Confidence, float Class);
|
||
}
|
||
// 最小化代价匹配(匈牙利算法),输入为 q×a 的代价矩阵,返回长度 q 的数组 rowsol:第 i 个问题匹配到的答案列索引
|
||
public static class Hungarian
|
||
{
|
||
public static int[] Solve(float[,] cost)
|
||
{
|
||
int nRows = cost.GetLength(0);
|
||
int nCols = cost.GetLength(1);
|
||
|
||
// 若列少于行,补齐为方阵
|
||
int n = Math.Max(nRows, nCols);
|
||
var a = new float[n, n];
|
||
for (int i = 0; i < n; i++)
|
||
for (int j = 0; j < n; j++)
|
||
a[i, j] = (i < nRows && j < nCols) ? cost[i, j] : 0f;
|
||
|
||
// 行最小值归一
|
||
for (int i = 0; i < n; i++)
|
||
{
|
||
float min = float.PositiveInfinity;
|
||
for (int j = 0; j < n; j++) min = Math.Min(min, a[i, j]);
|
||
for (int j = 0; j < n; j++) a[i, j] -= min;
|
||
}
|
||
|
||
// 列最小值归一
|
||
for (int j = 0; j < n; j++)
|
||
{
|
||
float min = float.PositiveInfinity;
|
||
for (int i = 0; i < n; i++) min = Math.Min(min, a[i, j]);
|
||
for (int i = 0; i < n; i++) a[i, j] -= min;
|
||
}
|
||
|
||
var starred = new bool[n, n];
|
||
var primed = new bool[n, n];
|
||
var rowCovered = new bool[n];
|
||
var colCovered = new bool[n];
|
||
|
||
// 初始:对每行选择第一个 0 且该列未被占用,打星
|
||
for (int i = 0; i < n; i++)
|
||
{
|
||
for (int j = 0; j < n; j++)
|
||
{
|
||
if (a[i, j] == 0 && !RowHasStar(starred, i, n) && !ColHasStar(starred, j, n))
|
||
{
|
||
starred[i, j] = true;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
CoverStarredColumns(starred, colCovered, n);
|
||
while (CountTrue(colCovered) < n)
|
||
{
|
||
(int r, int c) = FindZero(a, rowCovered, colCovered, n);
|
||
while (r == -1)
|
||
{
|
||
AdjustMatrix(a, rowCovered, colCovered, n);
|
||
(r, c) = FindZero(a, rowCovered, colCovered, n);
|
||
}
|
||
primed[r, c] = true;
|
||
|
||
int starCol = FindStarInRow(starred, r, n);
|
||
if (starCol != -1)
|
||
{
|
||
rowCovered[r] = true;
|
||
colCovered[starCol] = false;
|
||
}
|
||
else
|
||
{
|
||
// 交替路径:从这个打撇的 0 开始
|
||
var path = new List<(int r, int c)> { (r, c) };
|
||
int col = c;
|
||
int row;
|
||
|
||
while (true)
|
||
{
|
||
row = FindStarInCol(starred, col, n);
|
||
if (row == -1) break;
|
||
path.Add((row, col));
|
||
|
||
col = FindPrimeInRow(primed, row, n);
|
||
path.Add((row, col));
|
||
}
|
||
|
||
// 交替:星改非星,撇改星
|
||
foreach (var (rr, cc) in path)
|
||
{
|
||
if (starred[rr, cc]) starred[rr, cc] = false;
|
||
else starred[rr, cc] = true;
|
||
}
|
||
|
||
// 清空撇与覆盖
|
||
Array.Clear(rowCovered, 0, n);
|
||
Array.Clear(colCovered, 0, n);
|
||
Array.Clear(primed, 0, primed.Length);
|
||
|
||
CoverStarredColumns(starred, colCovered, n);
|
||
}
|
||
}
|
||
|
||
// 构造结果
|
||
var rowsol = new int[nRows];
|
||
for (int i = 0; i < nRows; i++)
|
||
{
|
||
int j = FindStarInRow(starred, i, n);
|
||
rowsol[i] = (j < nCols) ? j : Math.Min(i, nCols - 1);
|
||
}
|
||
return rowsol;
|
||
}
|
||
|
||
private static void CoverStarredColumns(bool[,] starred, bool[] colCovered, int n)
|
||
{
|
||
Array.Clear(colCovered, 0, colCovered.Length);
|
||
for (int i = 0; i < n; i++)
|
||
for (int j = 0; j < n; j++)
|
||
if (starred[i, j]) colCovered[j] = true;
|
||
}
|
||
|
||
private static (int, int) FindZero(float[,] a, bool[] rowCovered, bool[] colCovered, int n)
|
||
{
|
||
for (int i = 0; i < n; i++)
|
||
if (!rowCovered[i])
|
||
for (int j = 0; j < n; j++)
|
||
if (!colCovered[j] && a[i, j] == 0)
|
||
return (i, j);
|
||
return (-1, -1);
|
||
}
|
||
|
||
private static void AdjustMatrix(float[,] a, bool[] rowCovered, bool[] colCovered, int n)
|
||
{
|
||
float min = float.PositiveInfinity;
|
||
for (int i = 0; i < n; i++)
|
||
if (!rowCovered[i])
|
||
for (int j = 0; j < n; j++)
|
||
if (!colCovered[j])
|
||
min = Math.Min(min, a[i, j]);
|
||
|
||
for (int i = 0; i < n; i++)
|
||
{
|
||
if (rowCovered[i])
|
||
for (int j = 0; j < n; j++)
|
||
a[i, j] += min;
|
||
}
|
||
|
||
for (int j = 0; j < n; j++)
|
||
{
|
||
if (!colCovered[j])
|
||
for (int i = 0; i < n; i++)
|
||
a[i, j] -= min;
|
||
}
|
||
}
|
||
|
||
private static bool RowHasStar(bool[,] starred, int r, int n)
|
||
{
|
||
for (int j = 0; j < n; j++) if (starred[r, j]) return true;
|
||
return false;
|
||
}
|
||
|
||
private static bool ColHasStar(bool[,] starred, int c, int n)
|
||
{
|
||
for (int i = 0; i < n; i++) if (starred[i, c]) return true;
|
||
return false;
|
||
}
|
||
|
||
private static int FindStarInRow(bool[,] starred, int r, int n)
|
||
{
|
||
for (int j = 0; j < n; j++) if (starred[r, j]) return j;
|
||
return -1;
|
||
}
|
||
|
||
private static int FindStarInCol(bool[,] starred, int c, int n)
|
||
{
|
||
for (int i = 0; i < n; i++) if (starred[i, c]) return i;
|
||
return -1;
|
||
}
|
||
|
||
private static int FindPrimeInRow(bool[,] primed, int r, int n)
|
||
{
|
||
for (int j = 0; j < n; j++) if (primed[r, j]) return j;
|
||
return -1;
|
||
}
|
||
|
||
private static int CountTrue(bool[] a) => a.Count(x => x);
|
||
}
|
||
} |