Poker-AI.org

Re: screen scraping

2019-07-25T13:02:20+00:00

nefton wrote:

Some other resize:

}[/code]

Thanks for sharing.

Statistics: Posted by kramods — Thu Jul 25, 2019 1:02 pm

Re: screen scraping

2019-07-17T18:02:04+00:00

Some other resize:

Code:

void ResizeFast(cv::Mat* source_1b, cv::Mat* dest_1b) {
   double scale_x = (double)source_1b->cols / dest_1b->cols;
   double scale_y = (double)source_1b->rows / dest_1b->rows;
      for (int row = 0; row < dest_1b->rows; row++) {
      int source_row = lround(scale_y * row);
      for (int col = 0; col < dest_1b->cols; col++) {
         int source_col = lround(scale_x * col);
         dest_1b->at(row, col) = source_1b->at(source_row, source_col);
      }
   }
   return;
}


cv::Mat FilterTwiceLinear(cv::Mat* source_1b) {

   cv::Mat out2x_1b(source_1b->rows * 2, source_1b->cols * 2, CV_8UC1);
   out2x_1b.setTo(0);
   
   for (int row = 1; row < source_1b->rows - 1; row++) {
      for (int col = 1; col < source_1b->cols - 1; col++) {
         uint8_t v0 = source_1b->at(row + 0, col + 0);
         uint8_t v1 = source_1b->at(row - 1, col - 1);
         uint8_t v2 = source_1b->at(row - 1, col + 0);
         uint8_t v3 = source_1b->at(row - 1, col + 1);
         uint8_t v4 = source_1b->at(row + 0, col + 1);
         uint8_t v5 = source_1b->at(row + 1, col + 1);
         uint8_t v6 = source_1b->at(row + 1, col + 0);
         uint8_t v7 = source_1b->at(row + 1, col - 1);
         uint8_t v8 = source_1b->at(row + 0, col - 1);

         double v_lu = 0.446*v0 + 0.150*v1 + 0.202*v2 + 0.202*v8;
         double v_ru = 0.446*v0 + 0.150*v3 + 0.202*v2 + 0.202*v4;
         double v_rd = 0.446*v0 + 0.150*v5 + 0.202*v4 + 0.202*v6;
         double v_ld = 0.446*v0 + 0.150*v7 + 0.202*v8 + 0.202*v6;

         out2x_1b.at(row * 2 + 0, col * 2 + 0) = std::lround(v_lu);
         out2x_1b.at(row * 2 + 0, col * 2 + 1) = std::lround(v_ru);
         out2x_1b.at(row * 2 + 1, col * 2 + 0) = std::lround(v_ld);
         out2x_1b.at(row * 2 + 1, col * 2 + 1) = std::lround(v_rd);
      }
   }

   return out2x_1b;
}

cv::Mat FilterTwiceSquare(cv::Mat* source_1b) {

   cv::Mat out2x_1b(source_1b->rows * 2, source_1b->cols * 2, CV_8UC1);
   out2x_1b.setTo(0);

   for (int row = 1; row < source_1b->rows - 1; row++) {
      for (int col = 1; col < source_1b->cols - 1; col++) {
         uint8_t v0 = source_1b->at(row + 0, col + 0);
         uint8_t v1 = source_1b->at(row - 1, col - 1);
         uint8_t v2 = source_1b->at(row - 1, col + 0);
         uint8_t v3 = source_1b->at(row - 1, col + 1);
         uint8_t v4 = source_1b->at(row + 0, col + 1);
         uint8_t v5 = source_1b->at(row + 1, col + 1);
         uint8_t v6 = source_1b->at(row + 1, col + 0);
         uint8_t v7 = source_1b->at(row + 1, col - 1);
         uint8_t v8 = source_1b->at(row + 0, col - 1);

         double v_lu = 0.655*v0 + 0.075*v1 + 0.135*v2 + 0.135*v8;
         double v_ru = 0.655*v0 + 0.075*v3 + 0.135*v2 + 0.135*v4;
         double v_rd = 0.655*v0 + 0.075*v5 + 0.135*v4 + 0.135*v6;
         double v_ld = 0.655*v0 + 0.075*v7 + 0.135*v8 + 0.135*v6;

         out2x_1b.at(row * 2 + 0, col * 2 + 0) = std::lround(v_lu);
         out2x_1b.at(row * 2 + 0, col * 2 + 1) = std::lround(v_ru);
         out2x_1b.at(row * 2 + 1, col * 2 + 0) = std::lround(v_ld);
         out2x_1b.at(row * 2 + 1, col * 2 + 1) = std::lround(v_rd);
      }
   }

   return out2x_1b;
}

cv::Mat FilterTrippleSquare(cv::Mat* source_1b) {

   cv::Mat out2x_1b(source_1b->rows * 3, source_1b->cols * 3, CV_8UC1);
   out2x_1b.setTo(0);

   for (int row = 1; row < source_1b->rows - 1; row++) {
      for (int col = 1; col < source_1b->cols - 1; col++) {
         uint8_t s0 = source_1b->at(row + 0, col + 0);
         uint8_t s1 = source_1b->at(row - 1, col - 1);
         uint8_t s2 = source_1b->at(row - 1, col + 0);
         uint8_t s3 = source_1b->at(row - 1, col + 1);
         uint8_t s4 = source_1b->at(row + 0, col + 1);
         uint8_t s5 = source_1b->at(row + 1, col + 1);
         uint8_t s6 = source_1b->at(row + 1, col + 0);
         uint8_t s7 = source_1b->at(row + 1, col - 1);
         uint8_t s8 = source_1b->at(row + 0, col - 1);

         double v1 = 0.495*s0 + 0.124*s1 + 0.190*s2 + 0.190*s8;
         double v3 = 0.495*s0 + 0.124*s3 + 0.190*s2 + 0.190*s4;
         double v5 = 0.495*s0 + 0.124*s5 + 0.190*s4 + 0.190*s6;
         double v7 = 0.495*s0 + 0.124*s7 + 0.190*s6 + 0.190*s8;

         //double v2 = 0.624*s0 + 0.156*s2 + 0.062*s4 + 0.062*s8 + 0.048*s1 + 0.048*s3;
         //double v4 = 0.624*s0 + 0.156*s4 + 0.062*s2 + 0.062*s6 + 0.048*s3 + 0.048*s5;
         //double v6 = 0.624*s0 + 0.156*s6 + 0.062*s4 + 0.062*s8 + 0.048*s5 + 0.048*s7;
         //double v8 = 0.624*s0 + 0.156*s8 + 0.062*s2 + 0.062*s6 + 0.048*s1 + 0.048*s7;
         //double v0 = 0.125*(v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8);
         double v2 = (v1 + v3) / 2;
         double v4 = (v3 + v5) / 2;
         double v6 = (v5 + v7) / 2;
         double v8 = (v7 + v1) / 2;
         double v0 = (v1 + v3 + v5 + v7) / 4;

         out2x_1b.at(row * 3 + 1, col * 3 + 1) = std::lround(v0);
         out2x_1b.at(row * 3 + 0, col * 3 + 0) = std::lround(v1);
         out2x_1b.at(row * 3 + 0, col * 3 + 1) = std::lround(v2);
         out2x_1b.at(row * 3 + 0, col * 3 + 2) = std::lround(v3);
         out2x_1b.at(row * 3 + 1, col * 3 + 2) = std::lround(v4);
         out2x_1b.at(row * 3 + 2, col * 3 + 2) = std::lround(v5);
         out2x_1b.at(row * 3 + 2, col * 3 + 1) = std::lround(v6);
         out2x_1b.at(row * 3 + 2, col * 3 + 0) = std::lround(v7);
         out2x_1b.at(row * 3 + 1, col * 3 + 0) = std::lround(v8);
      }
   }

   return out2x_1b;
}

Statistics: Posted by nefton — Wed Jul 17, 2019 6:02 pm

Re: screen scraping

2019-07-17T17:36:30+00:00

Some more code
Wery fast Blur code:

Code:

void Blur3px1b(cv::Mat* img_1b) {
   for (int row = 1; row < img_1b->rows - 1; row++) {
      for (int col = 1; col < img_1b->cols - 1; col++) {
         uint8_t b0 = img_1b->at(row + 0, col + 0);
         uint8_t b1 = img_1b->at(row - 1, col - 1);
         uint8_t b2 = img_1b->at(row - 1, col + 0);
         uint8_t b3 = img_1b->at(row - 1, col + 1);
         uint8_t b4 = img_1b->at(row + 0, col - 1);
         uint8_t b5 = img_1b->at(row + 0, col + 1);
         uint8_t b6 = img_1b->at(row + 1, col - 1);
         uint8_t b7 = img_1b->at(row + 1, col + 0);
         uint8_t b8 = img_1b->at(row + 1, col + 1);
         int result = b0 / 2 + b1 / 16 + b2 / 16 + b3 / 16 + b4 / 16 + b5 / 16 + b6 / 16 + b7 / 16 + b8 / 16;
         if (result > 255) result = 255;
         img_1b->at(row, col) = result;
      }
   }
   return;
}

Best resize function ever:

Code:

void ResizeSuper(cv::Mat* source_1b, cv::Mat* dest_1b) {

   dest_1b->setTo(0);

   //define size of points 
   double source_x_point_size = 1.0 / source_1b->cols;
   double source_y_point_size = 1.0 / source_1b->rows;
   double dest_x_point_size = 1.0 / dest_1b->cols;
   double dest_y_point_size = 1.0 / dest_1b->rows;

   //foreach source row
   for (int source_row = 0; source_row < source_1b->rows; source_row++) {
      //Надо определить все ряды наначения (1 или много) куда входит этот ряд хотябы частично
      //Определяем начало и конец этого исходного ряда
      double source_start_y = source_y_point_size * source_row;
      double source_end_y = source_y_point_size * (source_row + 1);
      //Определяем первый ряд назначения который хотя б частично перекрывает наш ряд исходный
      int dest_start_row = source_start_y / dest_y_point_size;
      //Определяем последний ряд назначения который хотя б частично перекрывает наш ряд исходный
      int dest_end_row = source_end_y / dest_y_point_size;
      //std::cout << "Source_row: " << source_row << " \tDest: " << dest_start_row << " - " << dest_end_row << "\n";
      //Для всех рядов назначения определяем степень вхождения в них исходного ряда
      for (int dest_row = dest_start_row; dest_row <= dest_end_row; dest_row++) {
         //Определяем начало и конец ряда назначения
         double dest_start_y = dest_y_point_size * dest_row;
         double dest_end_y = dest_y_point_size * (dest_row + 1);
         //Определяем начало перекрывающегося промежутка (share_space_y_min)
         double share_space_y_min = dest_start_y;
         if (source_start_y > dest_start_y) share_space_y_min = source_start_y;
         double share_space_y_max = dest_end_y;
         if (source_end_y < dest_end_y) share_space_y_max = source_end_y;
         //Определяем коэффициент фхождения в них исходного ряда
         double dest_y_coeff = (share_space_y_max - share_space_y_min) / dest_y_point_size;
         //std::cout << "\tDest row: " << dest_row << " \tKoeff_y: " << dest_y_coeff << "\n";

         //======================================================================================
         //Для каждой исходной колонки
         for (int source_col = 0; source_col < source_1b->cols; source_col++) {
            //Надо определить все колонки наначения (1 или много) куда входит эта колонка хотябы частично
            //Определяем начало и конец этой исходной колонки
            double source_start_x = source_x_point_size * source_col;
            double source_end_x = source_x_point_size * (source_col + 1);
            //Определяем первую колонку назначения которая хотя б частично перекрывает нашу исходную колонку
            int dest_start_col = source_start_x / dest_x_point_size;
            //Определяем последнюю колонку назначения которая хотя б частично перекрывает нашу исходную колонку
            int dest_end_col = source_end_x / dest_x_point_size;
            //std::cout << "Source_col: " << source_col << " \tDest: " << dest_start_col << " - " << dest_end_col << "\n";
            //Для всех колонок назначения определяем степень вхождения в них исходной колонки
            for (int dest_col = dest_start_col; dest_col <= dest_end_col; dest_col++) {
               //Определяем начало и конец колонки назначения
               double dest_start_x = dest_x_point_size * dest_col;
               double dest_end_x = dest_x_point_size * (dest_col + 1);
               //Определяем начало перекрывающегося промежутка (share_space_x_min)
               double share_space_x_min = dest_start_x;
               if (source_start_x > dest_start_x) share_space_x_min = source_start_x;
               double share_space_x_max = dest_end_x;
               if (source_end_x < dest_end_x) share_space_x_max = source_end_x;
               //Определяем коэффициент фхождения в них исходной колонки
               double dest_x_coeff = (share_space_x_max - share_space_x_min) / dest_x_point_size;
               //std::cout << "\tDest col: " << dest_col << " \tKoeff_x: " << dest_x_coeff << "\n";

               //Прибавляем нашу исходную точку к соответствующей точке назначения с коэффициентами площади
               if (dest_y_coeff * dest_x_coeff > 0.001) {
                  dest_1b->at(dest_row, dest_col) += dest_y_coeff * dest_x_coeff * source_1b->at(source_row, source_col);
               }
            }
         }
      }
   }

   return;
}

Statistics: Posted by nefton — Wed Jul 17, 2019 5:36 pm

Re: screen scraping

2019-07-17T17:31:16+00:00

Hmm, seems there is some problems with my code.
There are some more code

Code:

void RGB2HSV(uint8_t r, uint8_t g, uint8_t b, uint8_t* h, uint8_t* s, uint8_t* v){
   unsigned __int8 rgbMin, rgbMax;
   rgbMin = r < g ? (r < b ? r : b) : (g < b ? g : b);
   rgbMax = r > g ? (r > b ? r : b) : (g > b ? g : b);
   *v = rgbMax;
   if (*v == 0) { *h = 0; *s = 0; return; }
   *s = 255 * long(rgbMax - rgbMin) / (*v);
   if (*s == 0) { *h = 0; return; }
   if (rgbMax == r) *h = 0 + 43 * (g - b) / (rgbMax - rgbMin);
   else if (rgbMax == g) *h = 85 + 43 * (b - r) / (rgbMax - rgbMin);
   else *h = 171 + 43 * (r - g) / (rgbMax - rgbMin);
   return;
}

void ShowLargeCallBackFunc(int event, int x, int y, int flags, void* userdata)
{

   if (flags == cv::EVENT_FLAG_RBUTTON){
      cv::Mat* scr = (cv::Mat*)userdata;
      int type = scr->type();
      int channels = scr->channels();
      if (channels == 1){
         std::cout << "Pos(" << x / 10 << ", " << y / 10 << ")  Val( " << (int)scr->at(y, x) << " )" << std::endl;
      }
      if (channels == 4){
         int r = scr->at(y, x)[2];
         int g = scr->at(y, x)[1];
         int b = scr->at(y, x)[0];
         unsigned __int8 h, s, v;
         RGB2HSV(r, g, b, &h, &s, &v);
         std::cout << "Pos( " << x / 10 << ", " << y / 10 << " )  RGB( " << r << ", " << g << ", " << b << " )";
         std::cout << " HSV( " << (int)h << ", " << (int)s << ", " << (int)v << " )" << std::endl;
      }
      if (channels == 3){
         int r = scr->at(y, x)[2];
         int g = scr->at(y, x)[1];
         int b = scr->at(y, x)[0];
         unsigned __int8 h, s, v;
         RGB2HSV(r, g, b, &h, &s, &v);
         std::cout << "Pos( " << x / 10 << ", " << y / 10 << " )  RGB( " << r << ", " << g << ", " << b << " )";
         std::cout << " HSV( " << (int)h << ", " << (int)s << ", " << (int)v << " )" << std::endl;
      }
   }
   return;
}



void ShowLarge10x(cv::String winname, cv::Mat scr, bool wait_to_cklick){
   int type = scr.type();
   int channels = scr.channels();
   cv::Mat Img(scr.rows * 10, scr.cols * 10, type);
   if (channels == 1){
      for (int row = 0; row < Img.rows; row++){
         for (int col = 0; col < Img.cols; col++){
            Img.at(row, col) = scr.at(row / 10, col / 10);
         }
      }
   }
   if (channels == 3){
      for (int row = 0; row < Img.rows; row++){
         for (int col = 0; col < Img.cols; col++){
            Img.at(row, col) = scr.at(row / 10, col / 10);
         }
      }
   }
   if (channels == 4){
      for (int row = 0; row < Img.rows; row++){
         for (int col = 0; col < Img.cols; col++){
            Img.at(row, col) = scr.at(row / 10, col / 10);
         }
      }
   }
   cv::imshow(winname, Img);
   cv::setMouseCallback(winname, ShowLargeCallBackFunc, &Img);
   if (wait_to_cklick) {
      cv::waitKey(0);
      cv::destroyWindow(winname);
   }
   return;
}

Show large shows image (1b, 3b, or 4b per pixel).
Also if you cklick right mouse button on any pixel it shows x,y RGB and HSV of that pixel.
Wery helpful, everybody must have this!

Statistics: Posted by nefton — Wed Jul 17, 2019 5:31 pm

Re: screen scraping

2019-07-17T02:14:34+00:00

Afaik uint8_t is just a typedef for unsigned char, so the types are exactly the same.

Statistics: Posted by HontoNiBaka — Wed Jul 17, 2019 2:14 am

Re: screen scraping

2019-07-16T20:05:15+00:00

nefton wrote:

cant add more than 3 attachment so filter 3 and the code here

Code:

#include 
#include "opencv2/opencv.hpp"
#include "common_ocr_functions.h"

int main() {

   cv::Mat source = cv::imread("D:/PROJECTS/opencv_sample/example.png");

   //cv::imshow("source", source);
   //cv::waitKey();

   //ShowLarge10x("source", source, true);

   cv::Mat filter1(source.rows * 2, source.cols * 2, CV_8UC1);

   for (int row = 0; row < source.rows; row++) {
      for (int col = 0; col < source.cols; col++) {
         cv::Vec3b point = source.at(row, col);
         uint8_t b = point[0];
         uint8_t g = point[1];
         uint8_t r = point[2];
         uint8_t h, s, v;
         RGB2HSV(r, g, b, &h, &s, &v);
         filter1.at(row * 2 + 0, col * 2 + 0) = v;
         filter1.at(row * 2 + 1, col * 2 + 0) = v;
         filter1.at(row * 2 + 0, col * 2 + 1) = v;
         filter1.at(row * 2 + 1, col * 2 + 1) = v;
      }
   }

   //ShowLarge10x("filter1", filter1, true);

   cv::Mat filter2(source.rows * 2, source.cols * 2, CV_8UC1);

   cv::GaussianBlur(filter1, filter2, cv::Size(4, 4), 0.0);

   //ShowLarge10x("filter2", filter2, true);

   cv::Mat filter3(source.rows * 2, source.cols * 2, CV_8UC1);

   for (int row = 0; row < filter2.rows; row++) {
      for (int col = 0; col < filter2.cols; col++) {
         uint8_t v = filter2.at(row, col);
         filter3.at(row, col) = 0;
         if (v > 100) filter3.at(row, col) = v;
      }
   }

   ShowLarge10x("filter3", filter3, true);

   std::cout << "\n\n";
   std::system("pause");
   return 0;
}

Can someone help me with this code?

pretty new into all of this and already been trying for a few hours to get this code working as the function rgb2hsv is missing here, i have found some standard rgb2hsv codes already out there but they are not using uint8 but unsigned char instead.

Not exactly sure why seems like we prefer uint8 over unsigned char if we are planning to save small numbers rather than characters

Statistics: Posted by sadnok — Tue Jul 16, 2019 8:05 pm

Re: screen scraping

2018-08-28T09:23:55+00:00

Hi!

Sorry i coulnt post sooner, but I had to help a friend with a construction job. Today is the first time i am working on the bot again.

I understand that greyscale is better. But i already have put a lot of work using binary. And i still have 100% accuracy. So i am just going on
using binary. If i run into problems, i will switch to grayscale.

The 15% (which was a random number) i dont use when recognizing numbers. I still use the best matching mask for that. But when i am looking for..lets say..a fold button...i look for a pattern and accept a certain error rate. As of now, i have again 100% accuracy recognizing buttons, etc. Speed (as you pointed out) is not the best. lol. But i am trying to get this scraping part done as soon as possible so I can test my AI bit..because if AI fails..everything fails.

Cheers!

Statistics: Posted by Jannus — Tue Aug 28, 2018 9:23 am

Re: screen scraping

2018-08-15T22:13:11+00:00

Jannus wrote:

(so 0.15 * src1-width * src1-height)

No! You measure differens binary yet. Yes or No. It is wrong way. If your image greyscale fore example, each point 0-255 value;
So average differense is Summ(ABS(Point(image) - Point(template))) / template.rows / template.cols;
For RGB image Differense is average for 3 chanels (but you have not use it, I use it once and want rewrite this stupid code )
And then you can compare your average difference with any number. 0.15 or somthing else.
0.15 - is wery BAD! If your image compare with template average difference = 0.15 - you use wery bad filters! 0.05 - is good value.
For example you have "6" image. With your not good filters it compare with "6" = 0.15; with "0" = 0.18; with "8" = 0.18;
It situation = age of working; Shoud be with GOOD filters : with "6" = 0.05; with "0" = 0.30; with "8" = 0.30.
It is GOOD recognition;

Statistics: Posted by nefton — Wed Aug 15, 2018 10:13 pm

Re: screen scraping

2018-08-15T20:41:47+00:00

nefton wrote:

In 1st phase we search few KEY points.

so just store some (say 4) RGB values for 4 x,y coordinates point. Then look for those 4 points, When you found it, compare the other x,y of the image to see if you get more matches then then the 4 you already had, and above threshold -> disco?

What is a good threshold? I now use 15% error of the search image. (so 0.15 * src1-width * src1-height)

Statistics: Posted by Jannus — Wed Aug 15, 2018 8:41 pm

Re: screen scraping

2018-08-15T20:42:26+00:00

nefton wrote:

Your code is very ... slow. Also it is wery... strange.
To chek width of image... Why?

I don't understand the question completely. But are you referring to these lines?

Code:

if ( (x +target.getWidth()) > endx  ) {
         
            break;
 }

These are because the search image cant be found anymore, because we already looked through the source image up to a point that the search image does not fit anymore in the remaing part of the source image. But im guessing this is not what you meant.

Statistics: Posted by Jannus — Wed Aug 15, 2018 8:31 pm

Re: screen scraping

2018-08-14T21:59:49+00:00

mlatinjo wrote:

Jannus wrote:

thanks for your post. incredibly helpful

But what i was wondering..is it possible to completely remove the OCR part. For example the stack size scraping. Just take screenshots of all the
10 digits. Store these as single image files (one for each digit). Then look for those images in the right places of the screenshot. Then reconstruct the stacksize based on the digits that are there. If this works (and is fast enough) you will have 100% accuracy and no need for OCR what so ever. I am going to try this approach first..if it doesnt work I will try Tesseract or OpenCV.

Will let you know how it went

On some sites you will see only 10 digits, but on most of sites today you have many variants of how one digit looks like depending on the position. It is likely the effect of font smoothing / scaling. Openholdem is very good at it, by applying different filters, and big advantage is that you can find already done TM files from other guys who already done it.

I completely agree. You can sometimes make your life easier though, like you can change the table background, or sometime smoothing will be disabled when you make the table very small. There are things like that, but I also assume it makes you easier to detect.

Statistics: Posted by HontoNiBaka — Tue Aug 14, 2018 9:59 pm

Re: screen scraping

2018-08-14T20:57:29+00:00

Jannus wrote:

Statistics: Posted by mlatinjo — Tue Aug 14, 2018 8:57 pm

Re: screen scraping

2018-08-13T08:32:58+00:00

Your code is very ... slow. Also it is wery... strange.
To chek width of image... Why?
there how your code can be done in 1 function

Code:

//will search src1 in src2
for (int start_row = 0; start_row < src2.rows - src1.rows; start_row ++){
   for (int start_col = 0; start_col < src2.cols - src1.cols; start_col ++){
      int treshold = 0;
      for (int row = 0; row < src1.rows; row++){
         for (int col = 0; col < src1.cols; col++){
            if (scr1.at(row, col) != scr2.at(start_row + row, start_col + col)) treshold++;
            if (treshold >= 20) break;
         }
         if (treshold >= 20) break;
      }
      if (treshold < 20) YouFoundItCongards(start_row, start_col);
   }   
}

But as you see it have 4!!!! recrutive cycles.
Treshold is good idea, but imagine that your searc image have few black lines at the start. So it will try to search every blak point of an image.
The main idea how I solve this is TwoPhaseSearch();
In 1st phase we search few KEY points. In simplest way they can be definded manualy. Or some grid of src1 image.
And when we find them - then we search whole image there.
As you see you can also write 3PhaseSearh() or even more )))) But 2Phase was enouth yet.

Statistics: Posted by nefton — Mon Aug 13, 2018 8:32 am

Re: screen scraping

2018-08-12T08:47:20+00:00

Quote:

Im just curious about speed.

- What's stopping you timing it and comparing the time to your budget? If you need it to be faster:
- - take a look at https://en.wikipedia.org/wiki/String-se ... _algorithm and its references. I know of someone who used KMP for your problem
- - Seems to me you could speed things up a lot by initially comparing a subset of the "needle" that is known to be statistically improbable in the "haystack".
- - Image recognition is a well studied problem. DNNs are overkill for your problem but I reckon there must be older techniques suitable for you out there. https://www.quora.com/How-do-image-reco ... ithms-work

Statistics: Posted by spears — Sun Aug 12, 2018 8:47 am

Re: screen scraping

2018-08-11T10:18:09+00:00

im writing a function that checks if an image is located in a bigger image (with a search region passed as a parameter)

is this ok? (it works)

Code:

// this function returns x,y coordinates of the location of target inside a region of src
   public Point findImageRegion(BufferedImage src,BufferedImage target,int startx,int starty, int endx,int endy) {
      Point p = new Point(0,0);
      
      for(int x=startx;x         
         // target cant be found because there is not enough room in the region anymore
         if ( (x +target.getWidth()) > endx  ) {
         
            break;
         }
      
         for(int y=starty;y            
            // target cant be found anymore, because there is not enough room in the region
            if ( ( y + target.getHeight()) > endy) {

               break;
            }
            
            if ( this.isImageHere(src, target, x, y)) {
               return new Point(x,y);
            }
            
         }
         
      }   
      return p;
   }

Code:

   // returns true if target is in src starting from topleftx,y
   private boolean isImageHere(BufferedImage src,BufferedImage target,int topleftx,int toplefty) {
      
      int errors = 0;
      for(int x=0;x         
         
         for(int y=0;y            
            if ( src.getRGB(topleftx+x,toplefty+y) != target.getRGB(x, y)) {
               errors++;
            }
            
            if ( errors > PIXEL_ERROR_THRESHOLD) {
               return false;
            }
            
         }
      }
      

      return true;
   }

So this does work. Im just curious about speed. I have little experience in that department....

Statistics: Posted by Jannus — Sat Aug 11, 2018 10:18 am

Re: screen scraping

2018-08-08T19:11:13+00:00

Still got a 100% recognition . I only had to add a single mask for digit '1' (there was also a '1' with a width of 4 instead of 5). Tomorow i will test a lot more stacks, cause today most of my work has been with this live result scraper viewer (see Screenshot). This way i can easily check if all stacks are recognized correctly during live play. Instead of gathering screenshot after screenshot and check afterwards.

Done for today. Peace.

Statistics: Posted by Jannus — Wed Aug 08, 2018 7:11 pm

Re: screen scraping

2018-08-08T12:36:52+00:00

got it. Thanks! Yeah got now 30 different stacks classified with 0 errors. Got no samples anymore So first i am going to play some manual poker and gather some more screenshots

Statistics: Posted by Jannus — Wed Aug 08, 2018 12:36 pm

Re: screen scraping

2018-08-08T11:59:51+00:00

Jannus wrote:

I don't understand what you mean by worrying about noise. What noise is this?

You have only several screenshots. In your binarization algoritm one "7" different from another "7".
What is this difference? It is some "new" point that appear near "7". You cannot predict this. And if one point appears right side of "7" and another left side of "6" - there will no be empty line betwen them, and you cannot separate it.
So that why I said that your algoritm works on an "edge of working".
But if it works already on 100 stacks for example in 100% so let it work further

Statistics: Posted by nefton — Wed Aug 08, 2018 11:59 am

Re: screen scraping

2018-08-08T11:45:08+00:00

@tefton

Thanks! I run the bot on a linux machine. This machine is running virtualbox (windows 7) and here the poker client resides (and only the poker client). I take a screenshot in linux (also capturing the desktop of the virtual box). So i think
that is a pretty save solution. Is it not?

You are right about the scaling image i uploaded. Those were not correct. As of now, i don't scale my images. Just use the original. See how it goes. Up to now i have 100% accuracy for hole&board cards, and stack size. But i need to test it with more screenshots of course.

I don't understand what you mean by worrying about noise. What noise is this?

Statistics: Posted by Jannus — Wed Aug 08, 2018 11:45 am

Re: screen scraping

2018-08-08T06:23:42+00:00

1. You show original b/w image "true" as you say. I call it binary.
But wen you show x4 and x10 there are smooth eges. it is not correct. Think you just scale it in paint or somthing else.
My adwice to write your oun simple function to correct scale x10

2. Your solution will work. And it will be wery fast dont wory about it. Even more. I`ll say it will be the fastest solution.
But it will work on "ege of working". Add some noise and it will fail.
When you compare your digit with template you will get a simple integer number of wrong compare points.
And there will be wery thick line betwin "8", "0", "6" in different variations.
You can solwe it by adding more then 1 "6" template for example. Let say You will have 4 "6" templates.
I do it wery long ago too ))
But in my opinion it is wrong and bay way.
It is not hard ant not long to get no binary but greyscale image. And doubling size solwe many problems with different "points". And then if you have large beautiful greyscale image of your digit you need to have only one beautiful template for yeach digit. And compare function will give you exelent results. Not "on ege of working".

3. For now you take screenshoot of your poker client. But soon you will decide that your bot is wery easy to diskover by poker client. So you will think about hardware solutions. And ones it appears you should be redy to work with noise, with not correct colors and ets.

Statistics: Posted by nefton — Wed Aug 08, 2018 6:23 am

Re: screen scraping

2018-08-07T19:27:50+00:00

it seems to work.

way i do it:

1. create black and white masks for all the digits. in my case digits have always height of 14. The width varies from 2, 5,7,8,9.

2. extract all possible digit regions from the stack (based on vertical black colums seperating digits)

for each digit region:
- if region size is 2, the digit is a '.'
- if region size is 5 the digit is always a 1. (other digits are always wider)
- if region size is 7: try to match vs 7 width masks -> pick best match
- if region size is 8: try to mach vs 8 width masks -> pick best match
- if region size is 9: try to match vs 9 width masks -> pick best match

Statistics: Posted by Jannus — Tue Aug 07, 2018 7:27 pm

Re: screen scraping

2018-08-07T15:02:48+00:00

ive studied a lot of digits. And all digits have the same height (14 pix). Which is nice..but the width seems to be different.

All except '1' and '4' the width is either 7 or 8. I have encountered at least 3 different digits of 7. But they all were 8x14.

My idea is to just make masks (one for each number,,,and one for each number which occurs at a different size)..

so because 7 always is 8x14 i wil make one mask. But 9 is either 7x14 and 8x14. So I will make 2 masks for 9. Then check
which mask has the highest match with actual screenshot. That mask is the winner. Or is this approach too slow? And maybe not accurate?
Cause i dont know how many other versions of the digits there are cause lack of sample size?

Statistics: Posted by Jannus — Tue Aug 07, 2018 3:02 pm

Re: screen scraping

2018-08-07T12:12:48+00:00

Hi,

Am i correct that your code just transforms the stack image for better processing later?

In my java program, i have transformed the stack image to a true black and white image. Below are 3 results.

1. original size
2. x4
3. x10

Can i use these? There seem to be black horizontal lines to seperate the digits. Or do i need to use the filter you suggested?

Second question. I don't understand why we need to scale the image up. I mean, the black horizontal lines will be there in the original x1 image as well.

Statistics: Posted by Jannus — Tue Aug 07, 2018 12:12 pm

Re: screen scraping

2018-08-07T11:41:23+00:00

@nefton

Thanks! I am going to study all your posts before i comment again. Its a lot to process for me. I am not familiar with manipulating images and scraping at all (i never heard of filters for instance ).

I did finish the easy part. My hole cards and the community cards are recognized 100% correctly now. So now its time to get the stack sizes recognized..

Statistics: Posted by Jannus — Tue Aug 07, 2018 11:41 am

Re: screen scraping

2018-08-06T18:16:01+00:00

It is very good that by filters we can separate digits by vertical lines. Else it is wery dificult way. (Hope PS engeneers dont read this forum )

Statistics: Posted by nefton — Mon Aug 06, 2018 6:16 pm

Re: screen scraping

2018-08-06T17:52:02+00:00

As we see in your sample of stack digits and point can be easy separated by vertical lines.
It is very good because it is very fast!
Think anybody can do this.
Next steps may be different.
Most easy to release is just cut numbers and point and resize it to some sise like 20x30
And then compare with template by calculating average difference by point

Statistics: Posted by nefton — Mon Aug 06, 2018 5:52 pm

Re: screen scraping

2018-08-06T17:44:47+00:00

cant add more than 3 attachment so filter 3 and the code here

Code:

#include 
#include "opencv2/opencv.hpp"
#include "common_ocr_functions.h"

int main() {

   cv::Mat source = cv::imread("D:/PROJECTS/opencv_sample/example.png");

   //cv::imshow("source", source);
   //cv::waitKey();

   //ShowLarge10x("source", source, true);

   cv::Mat filter1(source.rows * 2, source.cols * 2, CV_8UC1);

   for (int row = 0; row < source.rows; row++) {
      for (int col = 0; col < source.cols; col++) {
         cv::Vec3b point = source.at(row, col);
         uint8_t b = point[0];
         uint8_t g = point[1];
         uint8_t r = point[2];
         uint8_t h, s, v;
         RGB2HSV(r, g, b, &h, &s, &v);
         filter1.at(row * 2 + 0, col * 2 + 0) = v;
         filter1.at(row * 2 + 1, col * 2 + 0) = v;
         filter1.at(row * 2 + 0, col * 2 + 1) = v;
         filter1.at(row * 2 + 1, col * 2 + 1) = v;
      }
   }

   //ShowLarge10x("filter1", filter1, true);

   cv::Mat filter2(source.rows * 2, source.cols * 2, CV_8UC1);

   cv::GaussianBlur(filter1, filter2, cv::Size(4, 4), 0.0);

   //ShowLarge10x("filter2", filter2, true);

   cv::Mat filter3(source.rows * 2, source.cols * 2, CV_8UC1);

   for (int row = 0; row < filter2.rows; row++) {
      for (int col = 0; col < filter2.cols; col++) {
         uint8_t v = filter2.at(row, col);
         filter3.at(row, col) = 0;
         if (v > 100) filter3.at(row, col) = v;
      }
   }

   ShowLarge10x("filter3", filter3, true);

   std::cout << "\n\n";
   std::system("pause");
   return 0;
}

Statistics: Posted by nefton — Mon Aug 06, 2018 5:44 pm

Re: screen scraping

2018-08-06T18:01:59+00:00

The main threat in recognition is the filters. Good filter are 95% of sucsess recognition.
Here is source (10x) and 3 filters.
1st just do black/white from orign
2nd gausian blur (it is very fast but my advice to write it yourself (few strings of code))
3rd simple treshold

Impotant thig that I doubled sise of image. It been done to gausian blur works well. And separating of digits works well and fast too. In my expirianse any doubling or tbling small images is wery fast, it is faster then time I can measure

Statistics: Posted by nefton — Mon Aug 06, 2018 5:42 pm

Re: screen scraping

2018-08-06T16:36:44+00:00

Perfect! Here is your stack.
It will be hard to recognize..
I will write my steps here.

Statistics: Posted by nefton — Mon Aug 06, 2018 4:36 pm

Re: screen scraping

2018-08-06T15:27:08+00:00

i dont really understand why you need an image of the stack. but there it is. sorry if i do not understand what you want, new to scraping

https://ibb.co/n9X0mK

Statistics: Posted by Jannus — Mon Aug 06, 2018 3:27 pm

Re: screen scraping

2018-08-06T09:41:31+00:00

nefton wrote:

Show what you cannot recognize.

Jannus wrote:

So i know for instance where (x,y coordinates) the stacksizes of all the players
are displayed on screen. If you dont use tesseract to recognize each digit of the stacksize. What method
do you use instead? For instance say my own stack is 10.45

I ask show me example of stack. *.png image. (not jpeg)
And "stack" have not only x,y coordinates, also height and width

Statistics: Posted by nefton — Mon Aug 06, 2018 9:41 am

Re: screen scraping

2018-08-06T08:10:23+00:00

If you don't use Tesseract, then you you have to use some other off the shelf OCR solution or write your own. Writing your own will take longer. You might be able to make it faster by specialising on particular fonts and vocabularies, but there is quite a lot of work to do that. For you to have a realistic chance of completing this project trying out Tesseract should not be a big deal.

Statistics: Posted by spears — Mon Aug 06, 2018 8:10 am

Re: screen scraping

2018-08-05T20:41:39+00:00

HontoNiBaka wrote:

Jannus wrote:

Hi,

I tried my approach (scan for images of the digits in the screenshot) but as some has pointed out that does not work. I will try
tesseract next.

I have did some google searching..but its not entirely clear to me.... you have tesseract, tess4j, jTessBoxEditor. How do all these programs fit in to the big picture? What do i need?

I have a training set containing of several files for each digit.

Thanks!

Ps. do you also use tesseract to recognize cards? Or just image searching those?

Tesseract is the name of the project and the C library. Tess4j is a Java wrapper around the C library, you can think of it as Tesseract for Java, so if you use Java you will need tess4j. JTessBoxEditor apparently let's you train the Tesseract model with your own images, I have always only used the pretrained models which can be downloaded from the Tesseract site.

Thanks for explaining this. Very helpful.But nefton said that tesseract should not be used because its too slow. Whats a faster and better alternative according to you guys?

Statistics: Posted by Jannus — Sun Aug 05, 2018 8:41 pm

Re: screen scraping

2018-08-05T08:37:01+00:00

To get the best out of Tesseract check out old posts https://www.google.co.uk/search?q=site% ... e&ie=UTF-8

Statistics: Posted by spears — Sun Aug 05, 2018 8:37 am

Re: screen scraping

2018-08-05T06:01:58+00:00

Jannus wrote:

Statistics: Posted by HontoNiBaka — Sun Aug 05, 2018 6:01 am

Re: screen scraping

2018-08-04T14:24:45+00:00

@nefton

Thanks for your reply. I understand all that. So i know for instance where (x,y coordinates) the stacksizes of all the players
are displayed on screen. If you dont use tesseract to recognize each digit of the stacksize. What method
do you use instead?

For instance say my own stack is 10.45 I assume your bot has to recognize each digit seperately. And then combine those
digits to create the stack size.

Any help appreciated

Thanks!

Statistics: Posted by Jannus — Sat Aug 04, 2018 2:24 pm

Re: screen scraping

2018-08-04T14:09:52+00:00

To recognize poker table on the image you must do several steps.
1. Find your table position and it size.
2. Decide is it table you want to recognize. (may be you want not recognize all tables)
3. Decide how many peoples sit here and where. (2, 3, 4, 6, 9 players table or something else)
4. Depending on table size and table type (6players for example) you need to know where is your nicks, stacks, cards and ets.
And only then you need to recognize each of them.
Tesseract is wery slow and I dont know why to use it.
Show what you cannot recognize. (stack, nick card or ets)

Statistics: Posted by nefton — Sat Aug 04, 2018 2:09 pm

Re: screen scraping

2018-08-04T13:22:19+00:00

Statistics: Posted by Jannus — Sat Aug 04, 2018 1:22 pm

Re: screen scraping

2018-04-23T20:45:50+00:00

Yes like HontoNiBaka said each site will be different in font style and text layout. Some will align the text left, some right, some will center the text and if it's centered the digits will move coordinates based on the length of the stack size.

Regarding Anti-Aliasing aka ClearType, I've been looking into possibly disabling AA while I capture my initial image then immediately re-enabling it. I'm not sure if this is a good idea or not but here are the two .reg insctructions to enable and disable them

Disable ClearType and antialiasing

Code:

[HKEY_CURRENT_USER\Control Panel\Desktop]
"FontSmoothing"="0" 
"FontSmoothingType"=dword:00000000 

Enable cleartype and antialiasing

Code:

[HKEY_CURRENT_USER\Control Panel\Desktop]
"FontSmoothing"="2"
"FontSmoothingType"=dword:00000002

to the best of my knowledge in c# it would be something like this

Code:

//Set Reg Key
RegistryKey key = Registry.CurrentUser.OpenSubKey(@"Control Panel\Desktop", true);
//Disable AA & ClearType
key.SetValue("FontSmoothing", 0, RegistryValueKind.String); 
key.SetValue("FontSmoothingType", 0, RegistryValueKind.DWord);
//Grab Screenshot
YouScreenshotMethod();
//Enable AA & ClearType
key.SetValue("FontSmoothing", 2, RegistryValueKind.String);
key.SetValue("FontSmoothingType", 2, RegistryValueKind.DWord);

For getting centered text one character at a time, one idea I could think of to try and work around the locations would be to grab the rectangle for the entire stack size then run color detection pixel by pixel(pretty sure the best way to do this is to convert the image to a byte array) to get a grid of what pixels are text or background. Then parse through all the coordinates and find the first pixel that's the text color(the lowest X value that matches) and that would be where your first digit starts and you can make you first single character box(be sure to subtract the X value by one). Then knowing the size of each character you can keep making boxes, checking each one for the text color until you get a box that has no text color which means the last box was our last character.

Also you could possibly try to find the memory address for each stack using memory scanning software like cheat engine, but each time the program updates, the memory addresses will change and you would have to re-find them. but when using OCR once you have it working the locations more than likely wont change unless a major update occurs and they revamp their table layout.

Again not an expert, I just enjoy playing with this kind of stuff. Double check any information for yourself, good luck!

Statistics: Posted by Vagrant — Mon Apr 23, 2018 8:45 pm

Re: screen scraping

2018-04-23T17:09:54+00:00

This works on some site, but not on others. On some sites the digits will look differently, in a different context, for example when they are next to a different digit, so the 1 in 10 will look different than the 1 in 11.
The easiest sites are those, that don't use anti aliasing for the digits, the technique will work there.

Statistics: Posted by HontoNiBaka — Mon Apr 23, 2018 5:09 pm

Re: screen scraping

2018-04-23T11:46:32+00:00

Statistics: Posted by Jannus — Mon Apr 23, 2018 11:46 am

Re: screen scraping

2018-04-20T03:39:24+00:00

Hi, actually made an account just to join in this discussion.

I was working on a project and lost my data so im starting from scratch. my old project and current one both followed very similar logic as you are.

I used tesseract for my OCR and I was so close to having it perfect but the small fonts would get me, I havent gotten back to that part of the code as i just started the new version this morning. Just like spears replied I have been told that increasing the image size and sharpening / adding contrast will help. You can also convert to pure black and white but sometimes this was more hurtful because some of the spaces turn into part of the characters, Im sure theres a way to fix that im sort of new to processing images in c#.

heres a summary of what Im going to be doing on my new scraper(i use c#/.NET), FYI im no expert this is just what made sense to me Im always looking for better methods

1. capture screen
2. use imagemagick to sharpen the image( may also convert to pure B+W)
3. cut out the areas i need to use for OCR, bets, pot
4. Enlarge each section for better OCR results(I was told 400-500% is a good place to start)
5. save each section as a temp image to use for training in tesseract(once i have the accuracy dialed in I wont be saving temp images)
6. parse the cropped image with tesseract and compare results. if the results dont match continue to train tesseract . I have been using jTessBoxEditor for my training, its very easy to use.

again with out enlarging, sharpening, or converting to B/W i was able to get very good but not perfect(mostly with punctuation) accuracy in tesseract, Im hopeful the added steps will get me to where I want to be.

I hope any of this helps you out, even though this is really not that detailed, but if you want to compare notes let me know I'm always up for sharing information.

good luck.

edit: as far as the window size is concerned, I detect my windows by partial window title .Contains() in c#, then since the default size is known i resize the window to the default size so I am sure my coordinates are correct.

as far as the colors, AFAIK unless you are altering the colors sent to the screen red is red etc. Im pretty sure rgb 255,0,0 isnt interpreted any different between operating systems. also I use image comparison rather than pixel detection in those cases.

EX: Is the check box checked? take a cropped pic of the box checked, look within the coordinates of the box with some padding,does the box checked image exist within the coordinates? yes? box is checked. no? box is not checked.

I also use this logic for card detection.I have a saved image for each card value 2-10,J,Q,K,A as well as a saved image for each suit. I scan my hand then the community cards 1 by 1 to identify value and suit.

I feel like im rambling now, if i didnt make anything clear or if im wrong about something let me know

Statistics: Posted by Vagrant — Fri Apr 20, 2018 3:39 am

Re: screen scraping

2018-04-17T04:35:02+00:00

I've not done it myself but I've read that Tesseract can be made to work quite well by scaling up small characters, increasing contrast, and converting to greyscale.

Statistics: Posted by spears — Tue Apr 17, 2018 4:35 am

Re: screen scraping

2018-04-15T19:35:51+00:00

Quote:

but if you keep all the poker tables the same size..it could work couldnt it? (i dont understand your 3 point)

Sorry for my bad english.
For example: if you need to scarp stack size "$ 200.57" you will have many problems because the pixels will changes and is possible that the same numbers have different pixels in different "times".

Quote:

really dont know if the speed is going to be an issue.

I think so. But try...

Statistics: Posted by Timmy1992 — Sun Apr 15, 2018 7:35 pm

Re: screen scraping

2018-04-15T19:04:03+00:00

but if you keep all the poker tables the same size..it could work couldnt it? (i dont understand your 3 point)

really dont know if the speed is going to be an issue.

I am running a linux host...with virtual box windows 7. then scrape the virtual windows 7 system.

Statistics: Posted by Jannus — Sun Apr 15, 2018 7:04 pm

Re: screen scraping

2018-04-15T19:29:26+00:00

Jannus wrote:

about scraping the stacks:

is it also possible to create images of all the digits used in the poker client. and then just search for those images and construct
the stacksize that way...or does this take too much time?

i think this is the wrong way because:
1) if you resize the room you can't use your palette of rgb
2) the computation is too much slow
3) is possible that the palette of screen colors generated by the windows/linux system are "different" for some motivation

For me the best choice is neural network with normalization of inputs parameters (in other words: OCR, but the standard "ocr" softwares have problem with smaller characters).

Statistics: Posted by Timmy1992 — Sun Apr 15, 2018 6:56 pm

Re: screen scraping

2018-04-15T17:30:48+00:00

Statistics: Posted by Jannus — Sun Apr 15, 2018 5:30 pm

screen scraping

2018-04-15T15:50:40+00:00

hi,

i am starting coding a scraper. I code on a linux machine so I cant use the OpenScrap project. I searched this forum how to start but i wanted
to check with you guys first if my approach is okay.

- cards + buttons + checkboxes: take sample images of these, and then make a screenshot and look for these images in certain regions
- names + stacks: use OCR for these. Tesseract and openocr seem to be the choices.

Is this the way to go scrape wise?

Another option is to run OpenScrape on a windows machine. Create a tablemap using that program. And then parse that table map in
my bot code.

Statistics: Posted by Jannus — Sun Apr 15, 2018 3:50 pm