using
System;
using
System.Collections.Generic;
using
System.Linq;
using
System.Text;
using
System.Data;
using
System.Drawing;
using
System.IO;
using
System.Drawing.Imaging;
using
MODI; //Microsoft Office Document Imaging
// 首先用office安装盘这个组件,默认安装office的时候是不会安装的,只要添加这个组件功能就好了安装说明:http://support.microsoft.com/kb/982760
//组件Microsoft Office Document Imaging 12.0 Type Library(office2007)
//或者Microsoft Office Document Imaging 11.0 Type Library(office2003)
//中文简体OCR引擎 http://www.microsoft.com/downloads/thankyou.aspx?familyId=dd172063-9517-41d8-82af-29c38f7437b6&displayLang=zh-hk
namespace
ToText
{
/// <summary>
/// Optical Character Recognition光学字符识别
/// 20140507 Geovin Du
/// 涂聚文
/// </summary>
public
static class OCRGetstring
{
/// <summary>
/// 语言类型
/// </summary>
/// <returns></returns>
public
static DataTable getLanguageList()
{
DataTable dt = new
DataTable();
dt.Columns.Add( "ID" , typeof ( int ));
dt.Columns.Add( "LanguageName" , typeof ( string ));
dt.Columns.Add( "LanguageLCID" , typeof ( string ));
//dt.Rows.Add(1, "", 1);
dt.Rows.Add(1, "简体中文" , "2052" );
dt.Rows.Add(2, "繁体中文" , "1028" );
dt.Rows.Add(3, "英语" , "9" );
dt.Rows.Add(4, "捷克语" , "5" );
dt.Rows.Add(5, "丹麦语" , "6" );
dt.Rows.Add(6, "德语" , "7" );
dt.Rows.Add(7, "希腊语" , "8" );
dt.Rows.Add(8, "西班牙语" , "10" );
dt.Rows.Add(9, "芬兰语" , "11" );
dt.Rows.Add(10, "法语" , "12" );
dt.Rows.Add(11, "匈牙利语" , "14" );
dt.Rows.Add(12, "意大利语" , "16" );
dt.Rows.Add(13, "日语" , "17" );
dt.Rows.Add(14, "韩语" , "18" );
dt.Rows.Add(15, "荷兰语" , "19" );
dt.Rows.Add(16, "挪威语" , "20" );
dt.Rows.Add(17, "波兰语" , "21" );
dt.Rows.Add(18, "葡萄牙语" , "22" );
dt.Rows.Add(19, "俄语" , "25" );
dt.Rows.Add(20, "瑞典语" , "29" );
dt.Rows.Add(21, "土耳其语" , "31" );
return
dt;
}
/// <summary>
///
/// </summary>
/// <param name="sValue"></param>
/// <returns></returns>
private
static MODI.MiLANGUAGES GetLanuageType( string
sValue)
{
switch
(sValue)
{
case
"2052" :
return
MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED;
case
"5" :
return
MODI.MiLANGUAGES.miLANG_CZECH;
case
"6" :
return
MODI.MiLANGUAGES.miLANG_DANISH;
case
"7" :
return
MODI.MiLANGUAGES.miLANG_GERMAN;
case
"8" :
return
MODI.MiLANGUAGES.miLANG_GREEK;
case
"9" :
return
MODI.MiLANGUAGES.miLANG_ENGLISH;
case
"10" :
return
MODI.MiLANGUAGES.miLANG_SPANISH;
case
"11" :
return
MODI.MiLANGUAGES.miLANG_FINNISH;
case
"12" :
return
MODI.MiLANGUAGES.miLANG_FRENCH;
case
"14" :
return
MODI.MiLANGUAGES.miLANG_HUNGARIAN;
case
"16" :
return
MODI.MiLANGUAGES.miLANG_ITALIAN;
case
"17" :
return
MODI.MiLANGUAGES.miLANG_JAPANESE;
case
"18" :
return
MODI.MiLANGUAGES.miLANG_KOREAN;
case
"19" :
return
MODI.MiLANGUAGES.miLANG_DUTCH;
case
"20" :
return
MODI.MiLANGUAGES.miLANG_NORWEGIAN;
case
"21" :
return
MODI.MiLANGUAGES.miLANG_POLISH;
case
"22" :
return
MODI.MiLANGUAGES.miLANG_PORTUGUESE;
case
"25" :
return
MODI.MiLANGUAGES.miLANG_RUSSIAN;
case
"29" :
return
MODI.MiLANGUAGES.miLANG_SWEDISH;
case
"31" :
return
MODI.MiLANGUAGES.miLANG_TURKISH;
case
"1028" :
return
MODI.MiLANGUAGES.miLANG_CHINESE_TRADITIONAL;
default :
return
MODI.MiLANGUAGES.miLANG_ENGLISH;
}
}
/// <summary>
/// Images轉換文字
/// </summary>
/// <param name="image">Image</param>
/// <param name="language">语言类型</param>
/// <returns></returns>
public
static string ExtractText( this
System.Drawing.Image image, string
language)
{
var
tmpFile = Path.GetTempFileName();
StringBuilder sb = new
StringBuilder();
//string text;
try
{
var
bmp = new
Bitmap(Math.Max(image.Width, 1024), Math.Max(image.Height, 768));
var
gfxResize = Graphics.FromImage(bmp);
gfxResize.DrawImage(image, new
Rectangle(0, 0, image.Width, image.Height));
bmp.Save(tmpFile + ".bmp" , ImageFormat.Bmp);
var
doc = new
MODI.Document();
doc.Create(tmpFile + ".bmp" );
// doc.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true);
doc.OCR(GetLanuageType(language), true , true ); // 识别文字类型
var
img = (MODI.Image)doc.Images[0];
var
layout = img.Layout;
sb.Append(layout.Text);
//text = sb.ToString();// layout.Text;
}
finally
{
File.Delete(tmpFile);
File.Delete(tmpFile + ".bmp" );
}
return
sb.ToString(); // text;
}
/// <summary>
/// 来源图片文件轉換文字
/// </summary>
/// <param name="fileToOCR">file文件</param>
/// <param name="language">语言类型</param>
/// <returns></returns>
public
static string getFileToOCR( string
fileToOCR, string
language)
{
StringBuilder sb = new
StringBuilder();
if
(File.Exists(fileToOCR))
{
MODI.Document md = new
MODI.Document();
md.Create(fileToOCR);
md.OCR(GetLanuageType(language), true , true );
MODI.Image img;
MODI.Layout layout;
for
( int
i = 0; i < md.Images.Count; i++)
{
img = (MODI.Image)md.Images[i];
layout = img.Layout;
sb.Append(layout.Text);
}
md.Close( false );
}
else
{
sb.Append( "" );
}
return
sb.ToString();
}
}
}
|