opencv字符切割与识别（训练分类器）

147次阅读

共计 8102 个字符，预计需要花费 21 分钟才能阅读完成。

//opencv2.4.9 + vs2012 + 64位
#include <windows.h>
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>

using namespace cv;
using namespace std;

char* WcharToChar(const wchar_t* wp)
{
  char *m_char;
  int len = WideCharToMultiByte(CP_ACP, 0, wp, wcslen(wp), NULL, 0, NULL, NULL);
  m_char = new char[len + 1];
  WideCharToMultiByte(CP_ACP, 0, wp, wcslen(wp), m_char, len, NULL, NULL);
  m_char[len] = '\0';
  return m_char;
}

wchar_t* CharToWchar(const char* c)
{
  wchar_t *m_wchar;
  int len = MultiByteToWideChar(CP_ACP, 0, c, strlen(c), NULL, 0);
  m_wchar = new wchar_t[len + 1];
  MultiByteToWideChar(CP_ACP, 0, c, strlen(c), m_wchar, len);
  m_wchar[len] = '\0';
  return m_wchar;
}

wchar_t* StringToWchar(const string& s)
{
  const char* p = s.c_str();
  return CharToWchar(p);
}

void train(const string perfileReadPath, const string fileform)
{
  const int sample_mun_perclass = 40;//训练字符每类数量
  const int class_mun = 10 + 26;//训练字符类数 0-9 A-Z 除了I、O

  const int image_cols = 8;
  const int image_rows = 16;
  string  fileReadName, fileReadPath;
  char temp[256];

  float trainingData[class_mun*sample_mun_perclass][image_rows*image_cols] = { { 0 } };//每一行一个训练样本
  float labels[class_mun*sample_mun_perclass][class_mun] = { { 0 } };//训练样本标签

  for (int i = 0; i <= class_mun - 1; i++)//不同类
  {
    //读取每个类文件夹下所有图像
    int j = 0;//每一类读取图像个数计数

    if (i <= 9)//0-9
    {
      sprintf(temp, "%d", i);
      //printf("%d\n", i);
    }
    else//A-Z
    {
      sprintf(temp, "%c", i + 55);
      //printf("%c\n", i+55);
    }

    fileReadPath = perfileReadPath + "/" + temp + "/" + fileform;
    cout << "文件夹" << temp << endl;

    HANDLE hFile;
    LPCTSTR lpFileName = StringToWchar(fileReadPath);//指定搜索目录和文件类型，如搜索d盘的音频文件可以是"D:\\*.mp3"
    WIN32_FIND_DATA pNextInfo;  //搜索得到的文件信息将储存在pNextInfo中;
    hFile = FindFirstFile(lpFileName, &pNextInfo);//请注意是 &pNextInfo , 不是 pNextInfo;
    if (hFile == INVALID_HANDLE_VALUE)
    {
      continue;//搜索失败
    }
    //do-while循环读取
    do
    {
      if (pNextInfo.cFileName[0] == '.')//过滤.和..
        continue;
      j++;//读取一张图
      //wcout<<pNextInfo.cFileName<<endl;
      printf("%s\n", WcharToChar(pNextInfo.cFileName));
      //对读入的图片进行处理
      Mat srcImage = imread(perfileReadPath + "/" + temp + "/" + WcharToChar(pNextInfo.cFileName), CV_LOAD_IMAGE_GRAYSCALE);
      Mat resizeImage;
      Mat trainImage;
      Mat result;

      resize(srcImage, resizeImage, Size(image_cols, image_rows), (0, 0), (0, 0), CV_INTER_AREA);//使用象素关系重采样。当图像缩小时候，该方法可以避免波纹出现
      threshold(resizeImage, trainImage, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);

      for (int k = 0; k<image_rows*image_cols; ++k)
      {
        trainingData[i*sample_mun_perclass + (j - 1)][k] = (float)trainImage.data[k];
        //trainingData[i*sample_mun_perclass+(j-1)][k] = (float)trainImage.at<unsigned char>((int)k/8,(int)k%8);//(float)train_image.data[k];
        //cout<<trainingData[i*sample_mun_perclass+(j-1)][k] <<" "<< (float)trainImage.at<unsigned char>(k/8,k%8)<<endl;
      }

    } while (FindNextFile(hFile, &pNextInfo) && j<sample_mun_perclass);//如果设置读入的图片数量，则以设置的为准，如果图片不够，则读取文件夹下所有图片

  }

  // Set up training data Mat
  Mat trainingDataMat(class_mun*sample_mun_perclass, image_rows*image_cols, CV_32FC1, trainingData);
  cout << "trainingDataMat——OK！" << endl;

  // Set up label data 
  for (int i = 0; i <= class_mun - 1; ++i)
  {
    for (int j = 0; j <= sample_mun_perclass - 1; ++j)
    {
      for (int k = 0; k < class_mun; ++k)
      {
        if (k == i)
          labels[i*sample_mun_perclass + j][k] = 1;
        else
          labels[i*sample_mun_perclass + j][k] = 0;
      }
    }
  }
  Mat labelsMat(class_mun*sample_mun_perclass, class_mun, CV_32FC1, labels);
  cout << "labelsMat:" << endl;
  ofstream outfile("out.txt");
  outfile << labelsMat;
  //cout<<labelsMat<<endl;
  cout << "labelsMat——OK！" << endl;

  //训练代码

  cout << "training start...." << endl;
  CvANN_MLP bp;
  // Set up BPNetwork's parameters
  CvANN_MLP_TrainParams params;
  params.train_method = CvANN_MLP_TrainParams::BACKPROP;
  params.bp_dw_scale = 0.001;
  params.bp_moment_scale = 0.1;
  params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.0001);  //设置结束条件

  //Setup the BPNetwork
  Mat layerSizes = (Mat_<int>(1, 5) << image_rows*image_cols, 128, 128, 128, class_mun);
  bp.create(layerSizes, CvANN_MLP::SIGMOID_SYM, 1.0, 1.0);//CvANN_MLP::SIGMOID_SYM
  //CvANN_MLP::GAUSSIAN
  //CvANN_MLP::IDENTITY
  cout << "正在训练..." << endl;
  bp.train(trainingDataMat, labelsMat, Mat(), Mat(), params);
  bp.save("bpcharModel.xml"); //save classifier
  cout << "训练结束,生成bpModel1.xml文件" << endl;
}

void test(string xmlfile,string imgfile)
{
  CvANN_MLP bp;
  char bufxml[1024];
  strcpy(bufxml, xmlfile.c_str());
  bp.load(bufxml);  //加载xml

  const int image_cols = 8;
  const int image_rows = 16;
  char temp[256];
  //测试神经网络
  cout << "测试：" << endl;
  Mat test_image = imread(imgfile, CV_LOAD_IMAGE_GRAYSCALE);
  Mat test_temp;
  resize(test_image, test_temp, Size(image_cols, image_rows), (0, 0), (0, 0), CV_INTER_AREA);//使用象素关系重采样。当图像缩小时候，该方法可以避免波纹出现
  threshold(test_temp, test_temp, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
  Mat_<float>sampleMat(1, image_rows*image_cols);
  for (int i = 0; i<image_rows*image_cols; ++i)
  {
    sampleMat.at<float>(0, i) = (float)test_temp.at<uchar>(i / 8, i % 8);
  }

  Mat responseMat;
  bp.predict(sampleMat, responseMat);
  Point maxLoc;
  double maxVal = 0;
  minMaxLoc(responseMat, NULL, &maxVal, NULL, &maxLoc);

  if (maxLoc.x <= 9)//0-9
  {
    sprintf(temp, "%d", maxLoc.x);
    //printf("%d\n", i);
  }
  else//A-Z
  {
    sprintf(temp, "%c", maxLoc.x + 55);
    //printf("%c\n", i+55);
  }

  cout << "识别结果：" << temp << "	相似度:" << maxVal * 100 << "%" << endl;
  imshow("test_image", test_image);
  waitKey(0);
}

int main()
{
  train("C:\\Users\\GuoJawee\\Desktop\\数据集_\\训练集1", "*.jpg");
  system("pause");
  //test("bpcharModel.xml","1.bmp");
}

#include <iostream>

#include <stdlib.h>
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <map>
#include <time.h>

using namespace cv;
using namespace std;


//膨胀的size(3,3)
int dilateValue = 1;
//面积、宽高比
double min_area = 200;
double min_width_div_height = 0.08;
double max_width_div_height = 2.0;


CvANN_MLP bp;


//加载分类模型
void loadXML(string xmlfile)  
{
  char buf[1024];
  strcpy(buf, xmlfile.c_str()); 
  bp.load(buf);
}

//单个字符分类
char charClassify(Mat roi)
{
  const int image_cols = 8;
  const int image_rows = 16;
  char temp[256];

  resize(roi, roi, Size(image_cols, image_rows), (0, 0), (0, 0), CV_INTER_AREA);//使用象素关系重采样。当图像缩小时候，该方法可以避免波纹出现
  cvtColor(roi, roi, CV_RGB2GRAY, 0); //必须转换
  threshold(roi, roi, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
  Mat_<float>sampleMat(1, image_rows*image_cols);
  for (int i = 0; i < image_rows*image_cols; ++i)
  {
    sampleMat.at<float>(0, i) = (float)roi.at<uchar>(i / 8, i % 8);
  }

  Mat responseMat;
  bp.predict(sampleMat, responseMat);
  Point maxLoc;
  double maxVal = 0;
  minMaxLoc(responseMat, NULL, &maxVal, NULL, &maxLoc);

  if (maxLoc.x <= 9)//0-9
  {
    sprintf(temp, "%d", maxLoc.x);
    //printf("%d\n", i);
  }
  else//A-Z
  {
    sprintf(temp, "%c", maxLoc.x + 55);
    //printf("%c\n", i+55);
  }
  return temp[0];
}


//字符识别
string charRecognition(string imgfile, bool showflag)
{
  Mat srcImg = imread(imgfile);
  Mat greyImg, dilateImg;
  cvtColor(srcImg, dilateImg, CV_BGR2GRAY);
  threshold(dilateImg, dilateImg, 0, 255, CV_THRESH_OTSU);//自适应二值化
  dilateImg = 255 - dilateImg;//颜色反转
  Mat element = getStructuringElement(MORPH_RECT, Size(dilateValue, dilateValue)); //第一个参数MORPH_RECT表示矩形的卷积核，当然还可以选择椭圆形的、交叉型的
  dilate(dilateImg, dilateImg, element);

  Mat dstImg = srcImg.clone();
  vector<vector<Point>> contours;
  vector<Vec4i> hierarcy;
  findContours(dilateImg, contours, hierarcy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
  vector<Rect> boundRect(contours.size());   
  vector<RotatedRect> box(contours.size());  
  Point2f rect[4];

  map<int, char> map_; //左上角坐标  字符

  for (int i = 0; i < contours.size(); i++)
  {
    box[i] = minAreaRect(Mat(contours[i]));   
    boundRect[i] = boundingRect(Mat(contours[i]));

    box[i].points(rect);   

    double areaValue = (double)boundRect[i].width * (double)boundRect[i].height;
    double width_div_height = (double)boundRect[i].width / (double)boundRect[i].height;

    if (areaValue < min_area || min_width_div_height > width_div_height || width_div_height > max_width_div_height)
    {
      continue;
    }
    //cout << "面积 = " << areaValue << "    宽高比 = " << width_div_height << endl;

    Mat roi = dstImg(Rect(Point(boundRect[i].x, boundRect[i].y), Point(boundRect[i].x + boundRect[i].width, boundRect[i].y + boundRect[i].height)));
    char ch = charClassify(roi);
    map_.emplace(boundRect[i].x, ch);
    if (showflag == true)
    {
      rectangle(dstImg,
        Point(boundRect[i].x, boundRect[i].y),
        Point(boundRect[i].x + boundRect[i].width, boundRect[i].y + boundRect[i].height),
        Scalar(0, 0, 255), 2, 8);

      string tempstr(1, ch); //ch转str
      putText(dstImg, tempstr, Point(boundRect[i].x, boundRect[i].y), FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 1, 8, 0);
    }
  }

  string resstr = "";
  for (auto node : map_)
  {
    string tmpstr(1, node.second);
    resstr += tmpstr;
  }
  if (showflag == true)
  {
    cv::imshow("结果图", dstImg);
    cv::imwrite("res" + imgfile, dstImg);
  }
  return resstr;
}


int main()
{
  loadXML("./bpcharModel.xml");  //加载模型

  clock_t start = clock();

  string resstr = charRecognition("4.png", false);   //图像路径  是否显示
  cout << "识别字符结果 = " << resstr << endl << endl;

  clock_t ends = clock();
  cout << "程序运行时间 = " << (double)(ends - start) / CLOCKS_PER_SEC << endl;

  waitKey(0);
  system("pause");
  return 0;
}

其他参考：(17条消息) OpenCV 文字检测与识别模块_皆成旧梦的博客-CSDN博客_opencv文字识别

(17条消息) 使用Opencv进行轮廓检测，字符提取，简单的直方图字符识别！_17岁boy想当攻城狮的博客-CSDN博客_opencv字符检测

正文完

发表至： C/C++

2022-08-14