|
- // 图片的路径,这可以是PDF中提取出来的图片
- string imagePath = "C:\\Users\\Administrator\\Desktop\\111\\22222.jpg";
- //eng=英语 chi_sim=中文简体
- using (var engine = new TesseractEngine("./tessdata", "chi_sim", EngineMode.Default))
- {
- using (var img = PixConverter.ToPix(new Bitmap(imagePath)))
- {
- using (var page = engine.Process(img))
- {
- var text = page.GetText();
- Console.WriteLine("Mean confidence: {0}", page.GetMeanConfidence());
- Console.WriteLine("Text (GetIterator):");
- Console.WriteLine(text);
- }
- }
- }
复制代码
https://github.com/tesseract-ocr/tessdata
下载后的中文语言文件名为:chi_sim.traineddata
下载的chi_sim.traineddata文件,放在Tesseract-OCR\tessdata目录下,例如D:\Program Files\Tesseract-OCR\tessdata
|
|