diff --git a/.devcontainer.json b/.devcontainer.json deleted file mode 100644 index d026a95..0000000 --- a/.devcontainer.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "name": "EasyOCR-cpp", - "build": { - "dockerfile": "./Dockerfile", - "context": ".", - "args": {} - }, - "runArgs": [ - "--name=EasyOCR-cpp", - "--gpus", - "all" - ], - "workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/ocr-c++/EasyOCR-cpp,type=bind", - "workspaceFolder": "/workspaces/ocr-c++", - "customizations": { - "vscode": { - "extensions": [ - "ms-vscode.cmake-tools", - "ms-vscode.cpptools", - "ms-vscode.cpptools-extension-pack", - "ms-vscode.cpptools-themes", - "PKief.material-icon-theme", - "twxs.cmake" - ] - } - } -} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index fa90dfe..8cf470c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,24 +1,20 @@ -cmake_minimum_required(VERSION 3.20 FATAL_ERROR) -project(torchTest) +cmake_minimum_required(VERSION 3.14) +project(openvinoTest) +# Set the C++ standard set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_STANDARD_REQUIRED True) -# Add libtorch folder if built from dockerfile -if (DEFINED ENV{build_from_docker_file}) - get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) - list(APPEND CMAKE_PREFIX_PATH ${PARENT_DIR}/thirdparty/libtorch) -endif() -find_package(Torch REQUIRED) +# Make openvinoTest a startup project in MSVS +set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT openvinoTest) -# Find OPENCV automatically if built from dockerfile -if (DEFINED ENV{build_from_docker_file}) - find_package(OpenCV 4 REQUIRED) - find_package(Threads REQUIRED) -# Else force the path to opencv here if executed on Windows -else () - find_package(OpenCV REQUIRED PATHS C:/Users/sasso/Downloads/new/opencv/build ) -endif() +# Find OpenVINO package (use the latest version's simplified package find) +find_package(OpenVINO REQUIRED COMPONENTS Runtime) # Load only the inference runtime component +set(OpenCV_DIR "C:/OpenCV/opencv/build") +find_package(OpenCV REQUIRED) + +# Include OpenVINO and OpenCV directories +include_directories(${CMAKE_SOURCE_DIR}/include ${OpenVINO_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") # Get all character text files from the 'lang' directory @@ -26,7 +22,6 @@ file(GLOB LANG_FILES "lang/*") file(GLOB MODELS "models/*") - # Set the source file path set(SOURCE_FILE_PATH "test.jpg") @@ -48,23 +43,16 @@ foreach(FILE_PATH ${MODELS}) file(COPY ${FILE_PATH} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) endforeach() -add_executable(torchTest torchExample.cpp - src/TorchModel.cpp - src/CRAFT.cpp - src/CRNN.cpp) -target_include_directories(torchTest PUBLIC "include/") -target_link_libraries(torchTest ${OpenCV_LIBS} ${TORCH_LIBRARIES} ${MKL}) -set_property(TARGET torchTest PROPERTY CXX_STANDARD 17) +# Add the executable for your application +add_executable(${PROJECT_NAME} OpenvinoExample.cpp + src/OpenvinoModel.cpp + src/CRAFT.cpp + src/CRNN.cpp) + +# Link OpenVINO and OpenCV libraries to your target +target_link_libraries(${PROJECT_NAME} openvino::runtime ${OpenCV_LIBS}) -# The following code block is suggested to be used on Windows. -# According to https://github.com/pytorch/pytorch/issues/25457, -# the DLLs need to be copied to avoid memory errors. -if (MSVC) - set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT torchTest) - file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll") - add_custom_command(TARGET torchTest - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${TORCH_DLLS} - $) -endif (MSVC) \ No newline at end of file +# Print diagnostic information (Optional, useful for troubleshooting) +message(STATUS "OpenVINO include dirs: ${OpenVINO_INCLUDE_DIRS}") +message(STATUS "OpenCV include dirs: ${OpenCV_INCLUDE_DIRS}") +message(STATUS "OpenCV libraries: ${OpenCV_LIBS}") diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 053a6e0..0000000 --- a/Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 -WORKDIR /workspaces/ocr-c++ - -ENV build_from_docker_file 1 - -# install generic tools -RUN apt update && apt -y dist-upgrade && \ -DEBIAN_FRONTEND="noninteractive" apt install -y wget build-essential cmake \ -gdb git git-lfs libssl-dev pkg-config unzip libopencv-dev python3-opencv - -# download libtorch -RUN mkdir -p /workspaces/ocr-c++/thirdparty -RUN wget https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.1%2Bcu118.zip -O /workspaces/ocr-c++/thirdparty/libtorch.zip -RUN unzip /workspaces/ocr-c++/thirdparty/libtorch.zip -d /workspaces/ocr-c++/thirdparty/ && rm /workspaces/ocr-c++/thirdparty/libtorch.zip - -# keep container running after start -ENTRYPOINT ["tail"] -CMD ["-f","/dev/null"] \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/OpenvinoExample.cpp b/OpenvinoExample.cpp new file mode 100644 index 0000000..ead294f --- /dev/null +++ b/OpenvinoExample.cpp @@ -0,0 +1,52 @@ +#include "OpenVINOModel.h" +#include "CRAFT.h" +#include "CRNN.h" +#include +#include + +int main() +{ + CraftModel detection; + CRNNModel recognition; + + std::string det = "CRAFT-detector.xml"; + std::string rec = "recognition_model.xml"; + std::string filePath = "test.jpg"; + std::string device = "CPU"; + + auto check_det = detection.loadModel(det, device); + auto check_rec = recognition.loadModel(rec, device); + + cv::Mat matInput = detection.loadMat(filePath, false, true).clone(); + HeatMapRatio processed = detection.resizeAspect(matInput); + cv::Mat clone = processed.img.clone(); + cv::Mat grey = processed.img.clone(); + grey.convertTo(grey, CV_8UC1); + cv::cvtColor(grey, grey, cv::COLOR_BGR2GRAY); + clone.convertTo(clone, CV_8UC3); + ov::Tensor input = detection.preProcess(processed.img.clone()); + auto ss = std::chrono::high_resolution_clock::now(); + std::vector dets = detection.runDetector(input, true); + std::vector results = recognition.recognize(dets, grey); + auto ee = std::chrono::high_resolution_clock::now(); + auto difff = ee - ss; + int count = 0; + for (auto x : dets) + { + rectangle(clone, x.topLeft, x.bottomRight, cv::Scalar(0, 255, 0)); + putText(clone, std::to_string(count), (x.bottomRight + x.topLeft) / 2, cv::FONT_HERSHEY_COMPLEX, .6, cv::Scalar(100, 0, 255)); + count++; + + } + for (auto& result : results) + { + std::cout << "LOCATION: " << result.coords.topLeft << " " << result.coords.bottomRight << std::endl; + std::cout << "TEXT: " << result.text << std::endl; + std::cout << "CONFIDENCE " << result.confidence << std::endl; + std::cout << "################################################" << std::endl; + } + std::cout << "TOTAL INFERENCE TIME " << std::chrono::duration (difff).count() << " ms" << std::endl; + + return 0; +} + diff --git a/README.md b/README.md index 466ffaf..5215f7e 100644 --- a/README.md +++ b/README.md @@ -1,101 +1,34 @@ -# EasyOCR-cpp -![alt text](https://github.com/ksasso1028/EasyOCR-cpp/blob/main/output-heatmap.jpg) -### Custom C++ implementation of [EasyOCR](https://github.com/JaidedAI/EasyOCR) -### Built and tested on Windows 11, libtorch 1.13+cpu and OpenCV 4.6 +# EasyOCR-cpp OpenVINO -This C++ project implements the pre/post processing to run a OCR pipeline consisting of a text detector [CRAFT](https://arxiv.org/abs/1904.01941), and a CRNN based text recognizer. Unlike the EasyOCR python which is API based, this repo provides a set of classes to show how you can integrate OCR in any C++ program for maximum flexibility. The torchExample.cpp main program highlights how to utilize all elements of the EasyOCR-cpp pipeline. Because a test program is only provided, make sure to configure your input image within torchExample.cpp if you only plan to utilize the test program. +### Custom C++ implementation of [EasyOCR](https://github.com/JaidedAI/EasyOCR) with [OpenVINO](https://github.com/openvinotoolkit/openvino) backend +### Built and tested on Windows 11, openvino2024.4.0 and OpenCV 4.6 -**thrown together quickly within a week so many debug comments** :) +This C++ project implements the pre/post processing to run a OCR pipeline consisting of a text detector [CRAFT](https://arxiv.org/abs/1904.01941), and a CRNN based text recognizer. Unlike the EasyOCR python which is API based, this repo provides a set of classes to show how you can integrate OCR in any C++ program for maximum flexibility. The openvinoExample.cpp main program highlights how to utilize all elements of the EasyOCR-cpp pipeline. Because a test program is only provided, make sure to configure your input image within OpenvinoExample.cpp if you only plan to utilize the test program. -Libtorch is being utilized with an in-house class I usually use for C++ inference [TorchModel](https://github.com/ksasso1028/EasyOCR-cpp/blob/main/src/TorchModel.cpp), and OpenCV for the pre/post processing steps. -The TorchModel class can easily adapted to run inference on most Pytorch models converted to [TorchScript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html). Provides some handy functions to pre process opencv::Mat and handle device usage (GPU,CPU). Great starting point for C++ based inference for Pytorch Models. +## Setup with MSVS -Some features that have yet to be implemented: - -- [ ] beam search, only implemented greedy decoding -- [ ] .txt/.pdf output -- [ ] exact bounding box merge alg from EasyOCR, opted for custom one which is less complex -- [ ] support for other languages, atm only english is supported. - - -### If you would like to support feel free to make a PR, or a issue if you are having trouble. - -## Setup with Docker (Thanks to [@BBO-repo](https://github.com/BBO-repo) ) - -### Docker container with VSCode -To build and run through visual studio code, make sure the remote development extension is installed.
+### Dependencies +Install [OpenVINO](https://docs.openvino.ai/2024/get-started/install-openvino.html?PACKAGE=OPENVINO_BASE&VERSION=v_2024_4_0&OP_SYSTEM=WINDOWS&DISTRIBUTION=ARCHIVE) -The repository provides a .devcontainer.json which should allow you to directly reopen the cloned folder in a docker container using the bottom left icon and select `Reopen in container`as illustrated below -![vs-open-in-container](images/vs-open-in-container.png) +Use OpenCV Windows installer and unzip (v4.6) - > [OpenCV libs](https://opencv.org/releases/) -When the container is finally built, you can open a terminal and your working folder should be `/workspace/ocr-c++` as mentionned `.devcontainer.json` the field `"workspaceFolder": "/workspaces/ocr-c++"`. You should have the two folders: -- `thirdparty` containing the automatically downloaded libtorch library -- `EasyOCR-cpp` containing the code +Make sure to change the location in the [Makefile](https://github.com/avbelova/EasyOCR-cpp/blob/afc2090b6d32dda4461d3a361abb7eaa80116ff9/CMakeLists.txt#L11) for OpenCV to point to your OpenCV build dir -You can proceed as usual to build the cmake project +Set up OpenVINO environment: ``` -mkdir -p /workspaces/ocr-c++/EasyOCR-cpp/build -cd /workspaces/ocr-c++/EasyOCR-cpp/build -cmake .. -make +C:\Program Files (x86)\Intel\openvino_2024.4.0\setupvars.bat ``` -The binaries should be available in `build` folder, to run the example application just run: +Set up OpenCV environment ``` -cd /workspaces/ocr-c++/EasyOCR-cpp/build -./torchTest +C:\OpenCV\opencv\build\setup_vars_opencv4.cmd ``` +Create a build directory within the repo, cd to it and run cmake -### Docker container with command line -Considering that the repository was cloned in the folder `/my/working/directory/EasyOCR-cpp` for illustration. -#### Build the docker container -Build a docker image named for example `ocr_engine` from the `Dockerfile` inside the `/my/working/directory/EasyOCR-cpp` directory.
-``` -docker build --progress=plain -t ocr_engine /my/working/directory/EasyOCR-cpp -``` -Build a container named for example `EasyOCR-cpp` from the `ocr_engine` generated image -``` -docker create --name EasyOCR-cpp --mount type=bind,source=/my/working/directory/EasyOCR-cpp,target=/workspaces/ocr-c++/EasyOCR-cpp ocr_engine:latest -``` -You can now start, stop or restart the generated `EasyOCR-cpp` container.
-From a terminal, enter the following command to start the container and getting a terminal inside the container with an interactive mode -``` -docker container start EasyOCR-cpp -docker exec -ti EasyOCR-cpp bash -``` -You should have a bash terminal inside the container, entering `pwd` and `ls` commands should output the following: -``` -pwd - /workspaces/ocr-c++ -ls - thirdparty EasyOCR-cpp -``` -Similarly to previously to build cmake project, go to source code folder `cd /workspaces/ocr-c++/EasyOCR-cpp`, make a folder `build` and go inside, then `cmake ..` and `make` ``` -cd /workspaces/ocr-c++/EasyOCR-cpp mkdir build cd build -cmake .. -make -``` -Then similarly, the binaries should be available in `build` folder, to run the example application just run: -``` -cd /workspaces/ocr-c++/EasyOCR-cpp/build -./torchTest -``` - -## Setup to run without Docker - -### Dependencies -Click to Download libtorch - > [download](https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-1.13.1%2Bcpu.zip) - -Use OpenCV Windows installer and unzip (v4.6) - > [OpenCV libs](https://opencv.org/releases/) - -Make sure to change the location in the [Makefile](https://github.com/ksasso1028/EasyOCR-cpp/blob/e9311ee3f45b59c2709be3a98a04b48c215a845b/CMakeLists.txt#L7) for OpenCV to point to your OpenCV build dir - -Create a build directory within the repo, cd to it and run -``` -cmake -DCMAKE_PREFIX_PATH= .. +cmake .. ``` @@ -103,10 +36,34 @@ This will generate a solution within the build folder you can open up in Visual ### Running -Configure your input image [here](https://github.com/ksasso1028/EasyOCR-cpp/blob/e9311ee3f45b59c2709be3a98a04b48c215a845b/torchExample.cpp#L25). Currently the test program is using the test.jpg which comes in the repo. - -Launch from command-line, or within Visual Studio after building. - -**Since its designed to be used in a C++ program, text is not being written to disk at the moment** An output image will be generated in the main repo dir containing an annotated version of the input image with detection bounding boxes - +Configure your recognition model, input image and inference device [here](https://github.com/avbelova/EasyOCR-cpp/blob/0754743a0128266dc624964d01d45e2147b290fe/OpenvinoExample.cpp#L13C3-L15C32). Configure a characters list for your language [here](https://github.com/avbelova/EasyOCR-cpp/blob/0754743a0128266dc624964d01d45e2147b290fe/src/CRNN.cpp#L10) By default the openvinoTest program is using the english recognition model, test.jpg as an input image which comes in the repo and running inference on CPU. + +Launch from command-line, or within Visual Studio after building. **Don't forget to source environment variables for both OpenVINO and OpenCV as described above** + +### Adding more languages support + +This repo contains a recognition model for English [recognition_model.xml](https://github.com/avbelova/EasyOCR-cpp/blob/openvino-integration/models/recognition_model.xml) and for the most popular european languages based on latin symbols (German, French, Inalian, Spanish, etc.) [recognition_model_latin.xml](https://github.com/avbelova/EasyOCR-cpp/blob/openvino-integration/models/recognition_model_latin.xml). Please note that for the inference with OpenVINO both .xml and .bin files are required, they should have the same name and be placed in the same folder, but in a code you can specify only path to the .xml file. For both languages there are corresponding language characters files: [english_g2_characters.txt](https://github.com/avbelova/EasyOCR-cpp/blob/openvino-integration/lang/english_g2_characters.txt) and [latin_char.txt](https://github.com/avbelova/EasyOCR-cpp/blob/openvino-integration/lang/latin_char.txt). + +**If you need models for more languages, you can get them already in OpenVINO format following these steps:** +1. Create and activate python virtual environment: + ``` + python -m venv env + env\Scripts\activate + ``` +2. Install a patched Python EasyOCR version: + ``` + pip install git+https://github.com/avbelova/EasyOCR.git@model-convert-and-save + ``` +3. Run EasyOCR with the needed language as usuall in Python. For example the following code gets a Chineese recognition model: + ``` + import cv2 + import easyocr + + img=cv2.imread("chinese.jpg") + reader = easyocr.Reader(['ch_sim'], gpu="ov_cpu") + result = reader.readtext(img, detail = 0) + print(result) + ``` +4. Find a recogntion model in OpenVINO format in the directory from where you run EasyOCR in the previous step. +5. Don't forget to obtain a character list for your model. diff --git a/images/vs-open-in-container.png b/images/vs-open-in-container.png deleted file mode 100644 index 9a2c37d..0000000 Binary files a/images/vs-open-in-container.png and /dev/null differ diff --git a/include/CRAFT.h b/include/CRAFT.h index 682de7b..edc2427 100644 --- a/include/CRAFT.h +++ b/include/CRAFT.h @@ -1,9 +1,8 @@ +#pragma once #ifndef CRAFT_H #define CRAFT_H -#include -#include #include "string" -#include "TorchModel.h" +#include "OpenVINOModel.h" #include struct HeatMapRatio { @@ -37,16 +36,16 @@ struct pointSorter { } }; -class CraftModel: public TorchModel{ +class CraftModel : public OpenVINOModel { public: HeatMapRatio resizeAspect(cv::Mat& img); - cv::Mat normalize(const cv::Mat & img); - std::vector getBoundingBoxes(const torch::Tensor &input, const torch::Tensor& output, float textThresh = .7, float linkThresh = .4, float lowText = .4); - torch::Tensor preProcess(const cv::Mat & matInput); + cv::Mat normalize(const cv::Mat& img); + std::vector getBoundingBoxes(const ov::Tensor& input, const ov::Tensor& output, float textThresh = .7, float linkThresh = .4, float lowText = .4); + ov::Tensor preProcess(const cv::Mat& matInput); std::vector mergeBoundingBoxes(std::vector& dets, float distanceThresh, int height, int width); - std::vector runDetector(torch::Tensor& input, bool merge); + std::vector runDetector(ov::Tensor& input, bool merge); // stores the last computed ratio (resize/rescale) from input image. float ratio; }; -#endif +#endif \ No newline at end of file diff --git a/include/CRNN.h b/include/CRNN.h index 5f38ffc..1e89cb0 100644 --- a/include/CRNN.h +++ b/include/CRNN.h @@ -1,9 +1,7 @@ #ifndef CRNN_H #define CRNN_H -#include -#include #include "string" -#include "TorchModel.h" +#include "OpenvinoModel.h" #include "CRAFT.h" #include struct TextResult @@ -13,17 +11,19 @@ struct TextResult BoundingBox coords; }; -class CRNNModel : public TorchModel { +class CRNNModel : public OpenVINOModel { public: CRNNModel(); - std::vector recognize(std::vector& dets, cv::Mat& img, int& maxWidth); - torch::Tensor preProcess(cv::Mat& det); - torch::Tensor normalizePad(cv::Mat& processed, int minWidth); - std::string greedyDecode(torch::Tensor& input, int size); + std::vector recognize(std::vector& dets, cv::Mat& img); + ov::Tensor preProcess(cv::Mat& det); + ov::Tensor normalize(cv::Mat& processed); + std::string greedyDecode(std::vector& encoded); + ov::Tensor softmax(ov::Tensor& input, int dim); //stores the last computed ratio (resize/rescale) from input image. float ratio; std::vector characters; + void print_tensor(ov::Tensor& tensor); }; -#endif +#endif \ No newline at end of file diff --git a/include/OpenvinoModel.h b/include/OpenvinoModel.h new file mode 100644 index 0000000..6eee3cd --- /dev/null +++ b/include/OpenvinoModel.h @@ -0,0 +1,19 @@ +#ifndef TORCHMODEL_H +#define TORCHMODEL_H +#include "openvino/openvino.hpp" +#include "string" +#include + +class OpenVINOModel +{ +public: + OpenVINOModel(); + ~OpenVINOModel(); + bool loadModel(const std::string& modelPath, const std::string& device); + ov::Tensor predict(const ov::Tensor& input); + ov::Tensor convertToTensor(const cv::Mat& img, bool normalize = false, bool color = true); + cv::Mat convertToMat(const ov::Tensor& output, bool isFloat, bool permute, bool bgr, bool color); + cv::Mat loadMat(const std::string file, bool grey, bool rgb); + ov::CompiledModel compiled_model; +}; +#endif diff --git a/include/TorchModel.h b/include/TorchModel.h deleted file mode 100644 index 57e3d21..0000000 --- a/include/TorchModel.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef TORCHMODEL_H -#define TORCHMODEL_H -#include -#include -#include -#include -#include "string" -#include - -class TorchModel -{ - public: - TorchModel(); - ~TorchModel(); - bool loadModel(const std::string &modelPath); - torch::Tensor predict(const std::vector &input); - void changeDevice(const torch::DeviceType &deviceSet, const int &index); - torch::Tensor convertToTensor(const cv::Mat& img, bool normalize=false, bool color=true); - torch::Tensor convertListToTensor(std::list& imgs); - torch::Tensor predictTuple(const std::vector& input); - cv::Mat convertToMat(const torch::Tensor& output, bool isFloat, bool permute,bool bgr, bool color); - cv::Mat loadMat(const std::string file, bool grey, bool rgb); - torch::jit::script::Module model; - //Default device is CUDA, if avail - torch::Device device = torch::kCUDA; - }; -#endif diff --git a/lang/latin_char.txt b/lang/latin_char.txt new file mode 100644 index 0000000..1bd9ec6 --- /dev/null +++ b/lang/latin_char.txt @@ -0,0 +1 @@ + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~eÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ diff --git a/models/CRAFT-detector.bin b/models/CRAFT-detector.bin new file mode 100644 index 0000000..087317d Binary files /dev/null and b/models/CRAFT-detector.bin differ diff --git a/models/CRAFT-detector.pt b/models/CRAFT-detector.pt deleted file mode 100644 index d2af1a2..0000000 Binary files a/models/CRAFT-detector.pt and /dev/null differ diff --git a/models/CRAFT-detector.xml b/models/CRAFT-detector.xml new file mode 100644 index 0000000..ac3cc12 --- /dev/null +++ b/models/CRAFT-detector.xml @@ -0,0 +1,4357 @@ + + + + + + + + -1 + 3 + -1 + -1 + + + + + + + + 64 + 3 + 3 + 3 + + + + + + + + + + + 64 + 3 + 3 + 3 + + + + + 64 + 3 + 3 + 3 + + + + + + + + -1 + 3 + -1 + -1 + + + 64 + 3 + 3 + 3 + + + + + -1 + 64 + -1 + -1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + + + + 1 + 64 + 1 + 1 + + + + + 1 + 64 + 1 + 1 + + + + + + + + -1 + 64 + -1 + -1 + + + 1 + 64 + 1 + 1 + + + + + -1 + 64 + -1 + -1 + + + + + + + -1 + 64 + -1 + -1 + + + + + -1 + 64 + -1 + -1 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + + + + 64 + 64 + 3 + 3 + + + + + 64 + 64 + 3 + 3 + + + + + + + + -1 + 64 + -1 + -1 + + + 64 + 64 + 3 + 3 + + + + + -1 + 64 + -1 + -1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + + + + 1 + 64 + 1 + 1 + + + + + 1 + 64 + 1 + 1 + + + + + + + + -1 + 64 + -1 + -1 + + + 1 + 64 + 1 + 1 + + + + + -1 + 64 + -1 + -1 + + + + + + + -1 + 64 + -1 + -1 + + + + + -1 + 64 + -1 + -1 + + + + + + + + -1 + 64 + -1 + -1 + + + + + -1 + 64 + -1 + -1 + + + -1 + 64 + -1 + -1 + + + + + + + + 128 + 64 + 3 + 3 + + + + + + + + + + + 128 + 64 + 3 + 3 + + + + + 128 + 64 + 3 + 3 + + + + + + + + -1 + 64 + -1 + -1 + + + 128 + 64 + 3 + 3 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + 1 + 1 + + + + + + + + -1 + 128 + -1 + -1 + + + 1 + 128 + 1 + 1 + + + + + -1 + 128 + -1 + -1 + + + + + + + -1 + 128 + -1 + -1 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + + + + 128 + 128 + 3 + 3 + + + + + 128 + 128 + 3 + 3 + + + + + + + + -1 + 128 + -1 + -1 + + + 128 + 128 + 3 + 3 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + 1 + 1 + + + + + + + + -1 + 128 + -1 + -1 + + + 1 + 128 + 1 + 1 + + + + + -1 + 128 + -1 + -1 + + + + + + + -1 + 128 + -1 + -1 + + + + + -1 + 128 + -1 + -1 + + + + + + + + -1 + 128 + -1 + -1 + + + + + -1 + 128 + -1 + -1 + + + -1 + 128 + -1 + -1 + + + + + + + + 256 + 128 + 3 + 3 + + + + + + + + + + + 256 + 128 + 3 + 3 + + + + + 256 + 128 + 3 + 3 + + + + + + + + -1 + 128 + -1 + -1 + + + 256 + 128 + 3 + 3 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + -1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 256 + 256 + 3 + 3 + + + + + + + + + + + 256 + 256 + 3 + 3 + + + + + 256 + 256 + 3 + 3 + + + + + + + + -1 + 256 + -1 + -1 + + + 256 + 256 + 3 + 3 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + -1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 256 + 256 + 3 + 3 + + + + + + + + + + + 256 + 256 + 3 + 3 + + + + + 256 + 256 + 3 + 3 + + + + + + + + -1 + 256 + -1 + -1 + + + 256 + 256 + 3 + 3 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + -1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + -1 + 256 + -1 + -1 + + + + + + + + 512 + 256 + 3 + 3 + + + + + + + + + + + 512 + 256 + 3 + 3 + + + + + 512 + 256 + 3 + 3 + + + + + + + + -1 + 256 + -1 + -1 + + + 512 + 256 + 3 + 3 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 1 + 512 + 1 + 1 + + + + + + + + + + + 1 + 512 + 1 + 1 + + + + + 1 + 512 + 1 + 1 + + + + + + + + -1 + 512 + -1 + -1 + + + 1 + 512 + 1 + 1 + + + + + -1 + 512 + -1 + -1 + + + + + + + -1 + 512 + -1 + -1 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 512 + 512 + 3 + 3 + + + + + + + + + + + 512 + 512 + 3 + 3 + + + + + 512 + 512 + 3 + 3 + + + + + + + + -1 + 512 + -1 + -1 + + + 512 + 512 + 3 + 3 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 1 + 512 + 1 + 1 + + + + + + + + + + + 1 + 512 + 1 + 1 + + + + + 1 + 512 + 1 + 1 + + + + + + + + -1 + 512 + -1 + -1 + + + 1 + 512 + 1 + 1 + + + + + -1 + 512 + -1 + -1 + + + + + + + -1 + 512 + -1 + -1 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 512 + 512 + 3 + 3 + + + + + + + + + + + 512 + 512 + 3 + 3 + + + + + 512 + 512 + 3 + 3 + + + + + + + + -1 + 512 + -1 + -1 + + + 512 + 512 + 3 + 3 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 1 + 512 + 1 + 1 + + + + + + + + + + + 1 + 512 + 1 + 1 + + + + + 1 + 512 + 1 + 1 + + + + + + + + -1 + 512 + -1 + -1 + + + 1 + 512 + 1 + 1 + + + + + -1 + 512 + -1 + -1 + + + + + + + -1 + 512 + -1 + -1 + + + + + -1 + 512 + -1 + -1 + + + + + + + + -1 + 512 + -1 + -1 + + + + + -1 + 512 + -1 + -1 + + + -1 + 512 + -1 + -1 + + + + + + + + 512 + 512 + 3 + 3 + + + + + + + + + + + 512 + 512 + 3 + 3 + + + + + 512 + 512 + 3 + 3 + + + + + + + + -1 + 512 + -1 + -1 + + + 512 + 512 + 3 + 3 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 1 + 512 + 1 + 1 + + + + + + + + + + + 1 + 512 + 1 + 1 + + + + + 1 + 512 + 1 + 1 + + + + + + + + -1 + 512 + -1 + -1 + + + 1 + 512 + 1 + 1 + + + + + -1 + 512 + -1 + -1 + + + + + + + -1 + 512 + -1 + -1 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 512 + 512 + 3 + 3 + + + + + + + + + + + 512 + 512 + 3 + 3 + + + + + 512 + 512 + 3 + 3 + + + + + + + + -1 + 512 + -1 + -1 + + + 512 + 512 + 3 + 3 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 1 + 512 + 1 + 1 + + + + + + + + + + + 1 + 512 + 1 + 1 + + + + + 1 + 512 + 1 + 1 + + + + + + + + -1 + 512 + -1 + -1 + + + 1 + 512 + 1 + 1 + + + + + -1 + 512 + -1 + -1 + + + + + + + + -1 + 512 + -1 + -1 + + + + + -1 + 512 + -1 + -1 + + + -1 + 512 + -1 + -1 + + + + + + + + 1024 + 512 + 3 + 3 + + + + + + + + + + + 1024 + 512 + 3 + 3 + + + + + 1024 + 512 + 3 + 3 + + + + + + + + -1 + 512 + -1 + -1 + + + 1024 + 512 + 3 + 3 + + + + + -1 + 1024 + -1 + -1 + + + + + + + + 1 + 1024 + 1 + 1 + + + + + + + + + + + 1 + 1024 + 1 + 1 + + + + + 1 + 1024 + 1 + 1 + + + + + + + + -1 + 1024 + -1 + -1 + + + 1 + 1024 + 1 + 1 + + + + + -1 + 1024 + -1 + -1 + + + + + + + + 1024 + 1024 + 1 + 1 + + + + + + + + + + + 1024 + 1024 + 1 + 1 + + + + + 1024 + 1024 + 1 + 1 + + + + + + + + -1 + 1024 + -1 + -1 + + + 1024 + 1024 + 1 + 1 + + + + + -1 + 1024 + -1 + -1 + + + + + + + + 1 + 1024 + 1 + 1 + + + + + + + + + + + 1 + 1024 + 1 + 1 + + + + + 1 + 1024 + 1 + 1 + + + + + + + + -1 + 1024 + -1 + -1 + + + 1 + 1024 + 1 + 1 + + + + + -1 + 1024 + -1 + -1 + + + + + + + + -1 + 1024 + -1 + -1 + + + -1 + 512 + -1 + -1 + + + + + -1 + 1536 + -1 + -1 + + + + + + + + 512 + 1536 + 1 + 1 + + + + + + + + + + + 512 + 1536 + 1 + 1 + + + + + 512 + 1536 + 1 + 1 + + + + + + + + -1 + 1536 + -1 + -1 + + + 512 + 1536 + 1 + 1 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 1 + 512 + 1 + 1 + + + + + + + + + + + 1 + 512 + 1 + 1 + + + + + 1 + 512 + 1 + 1 + + + + + + + + -1 + 512 + -1 + -1 + + + 1 + 512 + 1 + 1 + + + + + -1 + 512 + -1 + -1 + + + + + + + -1 + 512 + -1 + -1 + + + + + -1 + 512 + -1 + -1 + + + + + + + + 256 + 512 + 3 + 3 + + + + + + + + + + + 256 + 512 + 3 + 3 + + + + + 256 + 512 + 3 + 3 + + + + + + + + -1 + 512 + -1 + -1 + + + 256 + 512 + 3 + 3 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + -1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + -1 + 512 + -1 + -1 + + + + + 4 + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + 4 + + + 2 + + + + + + 2 + + + + + + + + 2 + + + + + 2 + + + + + + + + 2 + + + + + + + + -1 + 256 + -1 + -1 + + + 2 + + + 2 + + + + + -1 + 256 + -1 + -1 + + + + + + + + -1 + 256 + -1 + -1 + + + -1 + 512 + -1 + -1 + + + + + -1 + 768 + -1 + -1 + + + + + + + + 256 + 768 + 1 + 1 + + + + + + + + + + + 256 + 768 + 1 + 1 + + + + + 256 + 768 + 1 + 1 + + + + + + + + -1 + 768 + -1 + -1 + + + 256 + 768 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + -1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 128 + 256 + 3 + 3 + + + + + + + + + + + 128 + 256 + 3 + 3 + + + + + 128 + 256 + 3 + 3 + + + + + + + + -1 + 256 + -1 + -1 + + + 128 + 256 + 3 + 3 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + 1 + 1 + + + + + + + + -1 + 128 + -1 + -1 + + + 1 + 128 + 1 + 1 + + + + + -1 + 128 + -1 + -1 + + + + + + + -1 + 128 + -1 + -1 + + + + + -1 + 128 + -1 + -1 + + + + + + + + -1 + 256 + -1 + -1 + + + + + 4 + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + 4 + + + 2 + + + + + + 2 + + + + + + + + 2 + + + + + 2 + + + + + + + + 2 + + + + + + + + -1 + 128 + -1 + -1 + + + 2 + + + 2 + + + + + -1 + 128 + -1 + -1 + + + + + + + + -1 + 128 + -1 + -1 + + + -1 + 256 + -1 + -1 + + + + + -1 + 384 + -1 + -1 + + + + + + + + 128 + 384 + 1 + 1 + + + + + + + + + + + 128 + 384 + 1 + 1 + + + + + 128 + 384 + 1 + 1 + + + + + + + + -1 + 384 + -1 + -1 + + + 128 + 384 + 1 + 1 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + 1 + 1 + + + + + + + + -1 + 128 + -1 + -1 + + + 1 + 128 + 1 + 1 + + + + + -1 + 128 + -1 + -1 + + + + + + + -1 + 128 + -1 + -1 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 64 + 128 + 3 + 3 + + + + + + + + + + + 64 + 128 + 3 + 3 + + + + + 64 + 128 + 3 + 3 + + + + + + + + -1 + 128 + -1 + -1 + + + 64 + 128 + 3 + 3 + + + + + -1 + 64 + -1 + -1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + + + + 1 + 64 + 1 + 1 + + + + + 1 + 64 + 1 + 1 + + + + + + + + -1 + 64 + -1 + -1 + + + 1 + 64 + 1 + 1 + + + + + -1 + 64 + -1 + -1 + + + + + + + -1 + 64 + -1 + -1 + + + + + -1 + 64 + -1 + -1 + + + + + + + + -1 + 128 + -1 + -1 + + + + + 4 + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + + 4 + + + 2 + + + + + + 2 + + + + + + + + 2 + + + + + 2 + + + + + + + + 2 + + + + + + + + -1 + 64 + -1 + -1 + + + 2 + + + 2 + + + + + -1 + 64 + -1 + -1 + + + + + + + + -1 + 64 + -1 + -1 + + + -1 + 128 + -1 + -1 + + + + + -1 + 192 + -1 + -1 + + + + + + + + 64 + 192 + 1 + 1 + + + + + + + + + + + 64 + 192 + 1 + 1 + + + + + 64 + 192 + 1 + 1 + + + + + + + + -1 + 192 + -1 + -1 + + + 64 + 192 + 1 + 1 + + + + + -1 + 64 + -1 + -1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + + + + 1 + 64 + 1 + 1 + + + + + 1 + 64 + 1 + 1 + + + + + + + + -1 + 64 + -1 + -1 + + + 1 + 64 + 1 + 1 + + + + + -1 + 64 + -1 + -1 + + + + + + + -1 + 64 + -1 + -1 + + + + + -1 + 64 + -1 + -1 + + + + + + + + 32 + 64 + 3 + 3 + + + + + + + + + + + 32 + 64 + 3 + 3 + + + + + 32 + 64 + 3 + 3 + + + + + + + + -1 + 64 + -1 + -1 + + + 32 + 64 + 3 + 3 + + + + + -1 + 32 + -1 + -1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + + + + 1 + 32 + 1 + 1 + + + + + 1 + 32 + 1 + 1 + + + + + + + + -1 + 32 + -1 + -1 + + + 1 + 32 + 1 + 1 + + + + + -1 + 32 + -1 + -1 + + + + + + + -1 + 32 + -1 + -1 + + + + + -1 + 32 + -1 + -1 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + + + + 32 + 32 + 3 + 3 + + + + + 32 + 32 + 3 + 3 + + + + + + + + -1 + 32 + -1 + -1 + + + 32 + 32 + 3 + 3 + + + + + -1 + 32 + -1 + -1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + + + + 1 + 32 + 1 + 1 + + + + + 1 + 32 + 1 + 1 + + + + + + + + -1 + 32 + -1 + -1 + + + 1 + 32 + 1 + 1 + + + + + -1 + 32 + -1 + -1 + + + + + + + -1 + 32 + -1 + -1 + + + + + -1 + 32 + -1 + -1 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + + + + 32 + 32 + 3 + 3 + + + + + 32 + 32 + 3 + 3 + + + + + + + + -1 + 32 + -1 + -1 + + + 32 + 32 + 3 + 3 + + + + + -1 + 32 + -1 + -1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + + + + 1 + 32 + 1 + 1 + + + + + 1 + 32 + 1 + 1 + + + + + + + + -1 + 32 + -1 + -1 + + + 1 + 32 + 1 + 1 + + + + + -1 + 32 + -1 + -1 + + + + + + + -1 + 32 + -1 + -1 + + + + + -1 + 32 + -1 + -1 + + + + + + + + 16 + 32 + 3 + 3 + + + + + + + + + + + 16 + 32 + 3 + 3 + + + + + 16 + 32 + 3 + 3 + + + + + + + + -1 + 32 + -1 + -1 + + + 16 + 32 + 3 + 3 + + + + + -1 + 16 + -1 + -1 + + + + + + + + 1 + 16 + 1 + 1 + + + + + + + + + + + 1 + 16 + 1 + 1 + + + + + 1 + 16 + 1 + 1 + + + + + + + + -1 + 16 + -1 + -1 + + + 1 + 16 + 1 + 1 + + + + + -1 + 16 + -1 + -1 + + + + + + + -1 + 16 + -1 + -1 + + + + + -1 + 16 + -1 + -1 + + + + + + + + 16 + 16 + 1 + 1 + + + + + + + + + + + 16 + 16 + 1 + 1 + + + + + 16 + 16 + 1 + 1 + + + + + + + + -1 + 16 + -1 + -1 + + + 16 + 16 + 1 + 1 + + + + + -1 + 16 + -1 + -1 + + + + + + + + 1 + 16 + 1 + 1 + + + + + + + + + + + 1 + 16 + 1 + 1 + + + + + 1 + 16 + 1 + 1 + + + + + + + + -1 + 16 + -1 + -1 + + + 1 + 16 + 1 + 1 + + + + + -1 + 16 + -1 + -1 + + + + + + + -1 + 16 + -1 + -1 + + + + + -1 + 16 + -1 + -1 + + + + + + + + 2 + 16 + 1 + 1 + + + + + + + + + + + 2 + 16 + 1 + 1 + + + + + 2 + 16 + 1 + 1 + + + + + + + + -1 + 16 + -1 + -1 + + + 2 + 16 + 1 + 1 + + + + + -1 + 2 + -1 + -1 + + + + + + + + 1 + 2 + 1 + 1 + + + + + + + + + + + 1 + 2 + 1 + 1 + + + + + 1 + 2 + 1 + 1 + + + + + + + + -1 + 2 + -1 + -1 + + + 1 + 2 + 1 + 1 + + + + + -1 + 2 + -1 + -1 + + + + + + + + 4 + + + + + + + -1 + 2 + -1 + -1 + + + 4 + + + + + -1 + -1 + -1 + 2 + + + + + + + -1 + -1 + -1 + 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/models/recognition_model.bin b/models/recognition_model.bin new file mode 100644 index 0000000..2a0ca0f Binary files /dev/null and b/models/recognition_model.bin differ diff --git a/models/recognition_model.xml b/models/recognition_model.xml new file mode 100644 index 0000000..fd64255 --- /dev/null +++ b/models/recognition_model.xml @@ -0,0 +1,2311 @@ + + + + + + + + 1 + 1 + -1 + -1 + + + + + + + + 1 + 1 + 97 + + + + + + + + + + + 1 + 1 + 97 + + + + + 1 + 1 + 97 + + + + + + + + 1 + 1 + 256 + + + + + + + + + + + 1 + 1 + 256 + + + + + 1 + 1 + 256 + + + + + + + + 1 + 1 + 256 + + + + + + + + + + + 1 + 1 + 256 + + + + + 1 + 1 + 256 + + + + + + + + 32 + 1 + 3 + 3 + + + + + + + + + + + 32 + 1 + 3 + 3 + + + + + 32 + 1 + 3 + 3 + + + + + + + + 1 + 1 + -1 + -1 + + + 32 + 1 + 3 + 3 + + + + + 1 + 32 + -1 + -1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + + + + 1 + 32 + 1 + 1 + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + 32 + -1 + -1 + + + 1 + 32 + 1 + 1 + + + + + 1 + 32 + -1 + -1 + + + + + + + 1 + 32 + -1 + -1 + + + + + 1 + 32 + -1 + -1 + + + + + + + + 1 + 32 + -1 + -1 + + + + + 1 + 32 + -1 + -1 + + + 1 + 32 + -1 + -1 + + + + + + + + 64 + 32 + 3 + 3 + + + + + + + + + + + 64 + 32 + 3 + 3 + + + + + 64 + 32 + 3 + 3 + + + + + + + + 1 + 32 + -1 + -1 + + + 64 + 32 + 3 + 3 + + + + + 1 + 64 + -1 + -1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + + + + 1 + 64 + 1 + 1 + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + 64 + -1 + -1 + + + 1 + 64 + 1 + 1 + + + + + 1 + 64 + -1 + -1 + + + + + + + 1 + 64 + -1 + -1 + + + + + 1 + 64 + -1 + -1 + + + + + + + + 1 + 64 + -1 + -1 + + + + + 1 + 64 + -1 + -1 + + + 1 + 64 + -1 + -1 + + + + + + + + 128 + 64 + 3 + 3 + + + + + + + + + + + 128 + 64 + 3 + 3 + + + + + 128 + 64 + 3 + 3 + + + + + + + + 1 + 64 + -1 + -1 + + + 128 + 64 + 3 + 3 + + + + + 1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + 128 + -1 + -1 + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + -1 + -1 + + + + + + + 1 + 128 + -1 + -1 + + + + + 1 + 128 + -1 + -1 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + + + + 128 + 128 + 3 + 3 + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + -1 + -1 + + + 128 + 128 + 3 + 3 + + + + + 1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + 128 + -1 + -1 + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + -1 + -1 + + + + + + + 1 + 128 + -1 + -1 + + + + + 1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + -1 + -1 + + + + + 1 + 128 + -1 + -1 + + + 1 + 128 + -1 + -1 + + + + + + + + 256 + 128 + 3 + 3 + + + + + + + + + + + 256 + 128 + 3 + 3 + + + + + 256 + 128 + 3 + 3 + + + + + + + + 1 + 128 + -1 + -1 + + + 256 + 128 + 3 + 3 + + + + + 1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + 1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + -1 + -1 + + + + + + + 1 + 256 + -1 + -1 + + + + + 1 + 256 + -1 + -1 + + + + + + + + 256 + 256 + 3 + 3 + + + + + + + + + + + 256 + 256 + 3 + 3 + + + + + 256 + 256 + 3 + 3 + + + + + + + + 1 + 256 + -1 + -1 + + + 256 + 256 + 3 + 3 + + + + + 1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + 1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + -1 + -1 + + + + + + + 1 + 256 + -1 + -1 + + + + + 1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + -1 + -1 + + + + + 1 + 256 + -1 + -1 + + + 1 + 256 + -1 + -1 + + + + + + + + 256 + 256 + 2 + 2 + + + + + + + + + + + 256 + 256 + 2 + 2 + + + + + 256 + 256 + 2 + 2 + + + + + + + + 1 + 256 + -1 + -1 + + + 256 + 256 + 2 + 2 + + + + + 1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + 1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + -1 + -1 + + + + + + + 1 + 256 + -1 + -1 + + + + + 1 + 256 + -1 + -1 + + + + + + + + 4 + + + + + + + 1 + 256 + -1 + -1 + + + 4 + + + + + 1 + -1 + 256 + -1 + + + + + + + + 1 + -1 + 256 + -1 + + + + + 1 + -1 + 256 + -1 + + + + + + + + 1 + + + + + + + 1 + -1 + 256 + -1 + + + 1 + + + + + 1 + -1 + 256 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + + + + + 1 + -1 + 256 + + + + + 3 + + + + + + + + + + + 1 + + + + + + + + + + + + + + + + + 3 + + + 1 + + + + + + 1 + + + + + + + + + + + 1 + + + + + + + + 1 + + + 1 + + + 1 + + + + + 3 + + + + + + + + + 3 + + + + + 2 + 1 + 256 + + + + + + + + 3 + + + + + + + 2 + 1 + 256 + + + 3 + + + + + 1 + 2 + 256 + + + + + + + + 1 + + + + + + + + 1 + + + + + + + + 3 + + + 1 + + + 1 + + + + + 1 + + + + + + + + 1 + + + 1 + + + + + 1 + + + + + + + + 2 + 1024 + 256 + + + + + + + + + + + 2 + 1024 + 256 + + + + + 2 + 1024 + 256 + + + + + + + + 2 + 1024 + 256 + + + + + + + + + + + 2 + 1024 + 256 + + + + + 2 + 1024 + 256 + + + + + + + + 2 + 1024 + + + + + + + + + + + 2 + 1024 + + + + + 2 + 1024 + + + + + + + + 1 + -1 + 256 + + + 1 + 2 + 256 + + + 1 + 2 + 256 + + + 1 + + + 2 + 1024 + 256 + + + 2 + 1024 + 256 + + + 2 + 1024 + + + + + 1 + 2 + -1 + 256 + + + 1 + 2 + 256 + + + 1 + 2 + 256 + + + + + + + + 4 + + + + + + + 1 + 2 + -1 + 256 + + + 4 + + + + + -1 + 1 + 2 + 256 + + + + + + + + + + + 3 + + + + + + + + -1 + 1 + 2 + 256 + + + 3 + + + + + -1 + 1 + 512 + + + + + + + + 3 + + + + + + + -1 + 1 + 512 + + + 3 + + + + + 1 + -1 + 512 + + + + + + + + 256 + 512 + + + + + + + + + + + 256 + 512 + + + + + 256 + 512 + + + + + + + + 1 + -1 + 512 + + + 256 + 512 + + + + + 1 + -1 + 256 + + + + + + + + 1 + 1 + 256 + + + 1 + -1 + 256 + + + + + 1 + -1 + 256 + + + + + + + + 1 + -1 + 256 + + + + + 3 + + + + + + + + 1 + + + + + + + + 1 + + + + + + + + 3 + + + 1 + + + 1 + + + + + 1 + + + + + + + + 1 + + + 1 + + + + + 1 + + + + + + + + 2 + 1024 + 256 + + + + + + + + + + + 2 + 1024 + 256 + + + + + 2 + 1024 + 256 + + + + + + + + 2 + 1024 + 256 + + + + + + + + + + + 2 + 1024 + 256 + + + + + 2 + 1024 + 256 + + + + + + + + 2 + 1024 + + + + + + + + + + + 2 + 1024 + + + + + 2 + 1024 + + + + + + + + 1 + -1 + 256 + + + 1 + 2 + 256 + + + 1 + 2 + 256 + + + 1 + + + 2 + 1024 + 256 + + + 2 + 1024 + 256 + + + 2 + 1024 + + + + + 1 + 2 + -1 + 256 + + + 1 + 2 + 256 + + + 1 + 2 + 256 + + + + + + + + 4 + + + + + + + 1 + 2 + -1 + 256 + + + 4 + + + + + -1 + 1 + 2 + 256 + + + + + + + + + + + 3 + + + + + + + + -1 + 1 + 2 + 256 + + + 3 + + + + + -1 + 1 + 512 + + + + + + + + 3 + + + + + + + -1 + 1 + 512 + + + 3 + + + + + 1 + -1 + 512 + + + + + + + + 256 + 512 + + + + + + + + + + + 256 + 512 + + + + + 256 + 512 + + + + + + + + 1 + -1 + 512 + + + 256 + 512 + + + + + 1 + -1 + 256 + + + + + + + + 1 + 1 + 256 + + + 1 + -1 + 256 + + + + + 1 + -1 + 256 + + + + + + + + 97 + 256 + + + + + + + + + + + 97 + 256 + + + + + 97 + 256 + + + + + + + + 1 + -1 + 256 + + + 97 + 256 + + + + + 1 + -1 + 97 + + + + + + + + 1 + 1 + 97 + + + 1 + -1 + 97 + + + + + 1 + -1 + 97 + + + + + + + 1 + -1 + 97 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/models/recognition_model_latin.bin b/models/recognition_model_latin.bin new file mode 100644 index 0000000..e123f59 Binary files /dev/null and b/models/recognition_model_latin.bin differ diff --git a/models/recognition_model_latin.xml b/models/recognition_model_latin.xml new file mode 100644 index 0000000..dc8bdea --- /dev/null +++ b/models/recognition_model_latin.xml @@ -0,0 +1,2335 @@ + + + + + + + + -1 + -1 + -1 + -1 + + + + + + + + -1 + -1 + + + + + + + + 32 + 1 + 3 + 3 + + + + + + + + + + + 32 + 1 + 3 + 3 + + + + + 32 + 1 + 3 + 3 + + + + + + + + -1 + -1 + -1 + -1 + + + 32 + 1 + 3 + 3 + + + + + -1 + 32 + -1 + -1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + + + + 1 + 32 + 1 + 1 + + + + + 1 + 32 + 1 + 1 + + + + + + + + -1 + 32 + -1 + -1 + + + 1 + 32 + 1 + 1 + + + + + -1 + 32 + -1 + -1 + + + + + + + -1 + 32 + -1 + -1 + + + + + -1 + 32 + -1 + -1 + + + + + + + + -1 + 32 + -1 + -1 + + + + + -1 + 32 + -1 + -1 + + + -1 + 32 + -1 + -1 + + + + + + + + 64 + 32 + 3 + 3 + + + + + + + + + + + 64 + 32 + 3 + 3 + + + + + 64 + 32 + 3 + 3 + + + + + + + + -1 + 32 + -1 + -1 + + + 64 + 32 + 3 + 3 + + + + + -1 + 64 + -1 + -1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + + + + 1 + 64 + 1 + 1 + + + + + 1 + 64 + 1 + 1 + + + + + + + + -1 + 64 + -1 + -1 + + + 1 + 64 + 1 + 1 + + + + + -1 + 64 + -1 + -1 + + + + + + + -1 + 64 + -1 + -1 + + + + + -1 + 64 + -1 + -1 + + + + + + + + -1 + 64 + -1 + -1 + + + + + -1 + 64 + -1 + -1 + + + -1 + 64 + -1 + -1 + + + + + + + + 128 + 64 + 3 + 3 + + + + + + + + + + + 128 + 64 + 3 + 3 + + + + + 128 + 64 + 3 + 3 + + + + + + + + -1 + 64 + -1 + -1 + + + 128 + 64 + 3 + 3 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + 1 + 1 + + + + + + + + -1 + 128 + -1 + -1 + + + 1 + 128 + 1 + 1 + + + + + -1 + 128 + -1 + -1 + + + + + + + -1 + 128 + -1 + -1 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + + + + 128 + 128 + 3 + 3 + + + + + 128 + 128 + 3 + 3 + + + + + + + + -1 + 128 + -1 + -1 + + + 128 + 128 + 3 + 3 + + + + + -1 + 128 + -1 + -1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + + + + 1 + 128 + 1 + 1 + + + + + 1 + 128 + 1 + 1 + + + + + + + + -1 + 128 + -1 + -1 + + + 1 + 128 + 1 + 1 + + + + + -1 + 128 + -1 + -1 + + + + + + + -1 + 128 + -1 + -1 + + + + + -1 + 128 + -1 + -1 + + + + + + + + -1 + 128 + -1 + -1 + + + + + -1 + 128 + -1 + -1 + + + -1 + 128 + -1 + -1 + + + + + + + + 256 + 128 + 3 + 3 + + + + + + + + + + + 256 + 128 + 3 + 3 + + + + + 256 + 128 + 3 + 3 + + + + + + + + -1 + 128 + -1 + -1 + + + 256 + 128 + 3 + 3 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + -1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 256 + 256 + 3 + 3 + + + + + + + + + + + 256 + 256 + 3 + 3 + + + + + 256 + 256 + 3 + 3 + + + + + + + + -1 + 256 + -1 + -1 + + + 256 + 256 + 3 + 3 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + -1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + -1 + 256 + -1 + -1 + + + + + + + + 256 + 256 + 2 + 2 + + + + + + + + + + + 256 + 256 + 2 + 2 + + + + + 256 + 256 + 2 + 2 + + + + + + + + -1 + 256 + -1 + -1 + + + 256 + 256 + 2 + 2 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + + + + 1 + 256 + 1 + 1 + + + + + 1 + 256 + 1 + 1 + + + + + + + + -1 + 256 + -1 + -1 + + + 1 + 256 + 1 + 1 + + + + + -1 + 256 + -1 + -1 + + + + + + + -1 + 256 + -1 + -1 + + + + + -1 + 256 + -1 + -1 + + + + + + + + 4 + + + + + + + -1 + 256 + -1 + -1 + + + 4 + + + + + -1 + -1 + 256 + -1 + + + + + + + + 2 + + + + + + + -1 + -1 + 256 + -1 + + + 2 + + + + + -1 + -1 + 256 + 1 + + + + + + + + + + + 4 + + + + + + + + -1 + -1 + 256 + 1 + + + 4 + + + + + -1 + -1 + 256 + 1 + + + + + + + + + + + + + -1 + -1 + 256 + 1 + + + + + + -1 + -1 + 256 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + + + + + -1 + -1 + 256 + -1 + + + + + 4 + + + + + + + + + + + 1 + + + + + + + + + + + + + + + + + 4 + + + 1 + + + + + + 1 + + + + + + + + + + + 1 + + + + + + + + 1 + + + 1 + + + 1 + + + + + 3 + + + + + + + + + 3 + + + + + 2 + -1 + 256 + + + + + + + + 3 + + + + + + + 2 + -1 + 256 + + + 3 + + + + + -1 + 2 + 256 + + + + + + + + -1 + -1 + 256 + + + + + 3 + + + + + + + + 1 + + + + + + + + 1 + + + + + + + + 3 + + + 1 + + + 1 + + + + + 1 + + + + + + + + 1 + + + + + 1 + + + + + + + + 1 + + + 1 + + + + + -1 + + + + + + + + 2 + 1024 + 256 + + + + + + + + + + + 2 + 1024 + 256 + + + + + 2 + 1024 + 256 + + + + + + + + 2 + 1024 + 256 + + + + + + + + + + + 2 + 1024 + 256 + + + + + 2 + 1024 + 256 + + + + + + + + 2 + 1024 + + + + + + + + + + + 2 + 1024 + + + + + 2 + 1024 + + + + + + + + -1 + -1 + 256 + + + -1 + 2 + 256 + + + -1 + 2 + 256 + + + -1 + + + 2 + 1024 + 256 + + + 2 + 1024 + 256 + + + 2 + 1024 + + + + + -1 + 2 + -1 + 256 + + + -1 + 2 + 256 + + + -1 + 2 + 256 + + + + + + + + 4 + + + + + + + -1 + 2 + -1 + 256 + + + 4 + + + + + -1 + -1 + 2 + 256 + + + + + + + + + + + 3 + + + + + + + + -1 + -1 + 2 + 256 + + + 3 + + + + + -1 + -1 + 512 + + + + + + + + 256 + 512 + + + + + + + + + + + 256 + 512 + + + + + 256 + 512 + + + + + + + + -1 + -1 + 512 + + + 256 + 512 + + + + + -1 + -1 + 256 + + + + + + + + 1 + 1 + 256 + + + + + + + + + + + 1 + 1 + 256 + + + + + 1 + 1 + 256 + + + + + + + + -1 + -1 + 256 + + + 1 + 1 + 256 + + + + + -1 + -1 + 256 + + + + + + + + -1 + -1 + 256 + + + + + 3 + + + + + + + + 1 + + + + + + + + 1 + + + + + + + + 3 + + + 1 + + + 1 + + + + + 1 + + + + + + + + 1 + + + 1 + + + + + -1 + + + + + + + + 2 + 1024 + 256 + + + + + + + + + + + 2 + 1024 + 256 + + + + + 2 + 1024 + 256 + + + + + + + + 2 + 1024 + 256 + + + + + + + + + + + 2 + 1024 + 256 + + + + + 2 + 1024 + 256 + + + + + + + + 2 + 1024 + + + + + + + + + + + 2 + 1024 + + + + + 2 + 1024 + + + + + + + + -1 + -1 + 256 + + + -1 + 2 + 256 + + + -1 + 2 + 256 + + + -1 + + + 2 + 1024 + 256 + + + 2 + 1024 + 256 + + + 2 + 1024 + + + + + -1 + 2 + -1 + 256 + + + -1 + 2 + 256 + + + -1 + 2 + 256 + + + + + + + + 4 + + + + + + + -1 + 2 + -1 + 256 + + + 4 + + + + + -1 + -1 + 2 + 256 + + + + + + + + + + + 3 + + + + + + + + -1 + -1 + 2 + 256 + + + 3 + + + + + -1 + -1 + 512 + + + + + + + + 256 + 512 + + + + + + + + + + + 256 + 512 + + + + + 256 + 512 + + + + + + + + -1 + -1 + 512 + + + 256 + 512 + + + + + -1 + -1 + 256 + + + + + + + + 1 + 1 + 256 + + + + + + + + + + + 1 + 1 + 256 + + + + + 1 + 1 + 256 + + + + + + + + -1 + -1 + 256 + + + 1 + 1 + 256 + + + + + -1 + -1 + 256 + + + + + + + + 352 + 256 + + + + + + + + + + + 352 + 256 + + + + + 352 + 256 + + + + + + + + -1 + -1 + 256 + + + 352 + 256 + + + + + -1 + -1 + 352 + + + + + + + + 1 + 1 + 352 + + + + + + + + + + + 1 + 1 + 352 + + + + + 1 + 1 + 352 + + + + + + + + -1 + -1 + 352 + + + 1 + 1 + 352 + + + + + -1 + -1 + 352 + + + + + + + -1 + -1 + 352 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/models/traced-recog.pt b/models/traced-recog.pt deleted file mode 100644 index 79e66f6..0000000 Binary files a/models/traced-recog.pt and /dev/null differ diff --git a/output-heatmap.jpg b/output-heatmap.jpg deleted file mode 100644 index 2039e41..0000000 Binary files a/output-heatmap.jpg and /dev/null differ diff --git a/src/CRAFT.cpp b/src/CRAFT.cpp index 661d086..ac82f66 100644 --- a/src/CRAFT.cpp +++ b/src/CRAFT.cpp @@ -1,8 +1,9 @@ #include "CRAFT.h" #include +#include +#include -using namespace torch::indexing; -HeatMapRatio CraftModel::resizeAspect(cv::Mat& img) +HeatMapRatio CraftModel::resizeAspect(cv::Mat& img) { HeatMapRatio output; try { @@ -51,7 +52,7 @@ HeatMapRatio CraftModel::resizeAspect(cv::Mat& img) return output; } std::vector CraftModel::mergeBoundingBoxes(std::vector& dets, float distanceThresh, int height, int width) -{ +{ // represents how much we change the top left Y std::sort(dets.begin(), dets.end(), boxSorter()); bool merge = NULL; @@ -70,7 +71,7 @@ std::vector CraftModel::mergeBoundingBoxes(std::vector float ratio = x / xPrime; bool isNegative = false; if (x - xPrime < 0) isNegative = true; - float w= dets[i].bottomRight.x - dets[i].topLeft.x; + float w = dets[i].bottomRight.x - dets[i].topLeft.x; float h = dets[i].bottomRight.y - dets[i].topLeft.y; if (width > 5 * height) { @@ -79,7 +80,7 @@ std::vector CraftModel::mergeBoundingBoxes(std::vector continue; } //merge box, store point - if (ratio > distanceThresh && ratio < 1.4 && std::abs(dets[i].bottomRight.y - dets[i+1].bottomRight.y) < 20) + if (ratio > distanceThresh && ratio < 1.4 && std::abs(dets[i].bottomRight.y - dets[i + 1].bottomRight.y) < 20) { newBottomRight = dets[i + 1].bottomRight; if (dets[i + 1].topLeft.y < dets[i].topLeft.y) @@ -128,7 +129,7 @@ std::vector CraftModel::mergeBoundingBoxes(std::vector newBox.topLeft.x = 0; } newBox.topLeft.x = int(newBox.topLeft.x); - newBox.topLeft.y = int(newBox.topLeft.y); + newBox.topLeft.y = int(newBox.topLeft.y); newBox.bottomRight = newBottomRight; newBox.bottomRight.y += maxY; @@ -138,15 +139,15 @@ std::vector CraftModel::mergeBoundingBoxes(std::vector if (newBox.bottomRight.y > height) { - newBox.bottomRight.y = height-1; + newBox.bottomRight.y = height - 1; } if (newBox.bottomRight.x > width) { - newBox.bottomRight.x = width-5; + newBox.bottomRight.x = width - 5; } newBox.bottomRight.x = int(newBox.bottomRight.x); - newBox.bottomRight.y = int(newBox.bottomRight.y); + newBox.bottomRight.y = int(newBox.bottomRight.y); merged.push_back(newBox); // move top left box to next box newTopLeft = i + 1; @@ -158,12 +159,33 @@ std::vector CraftModel::mergeBoundingBoxes(std::vector return merged; } -std::vector CraftModel::getBoundingBoxes(const torch::Tensor &input, const torch::Tensor& output, float textThresh, float linkThresh, float lowText) +std::vector CraftModel::getBoundingBoxes(const ov::Tensor& input, const ov::Tensor& output, float textThresh, float linkThresh, float lowText) { std::vector detBoxes; - cv::Mat linkMap = this->convertToMat(output.select(2, 0).unsqueeze(0).clone(),true, true, false, false).clone(); - cv::Mat textMap = this->convertToMat(output.select(2, 1).unsqueeze(0).clone(), true, true, false, false).clone(); - auto tempTextMap = output.select(2, 1).unsqueeze(0).clone(); + const ov::Shape output_shape = output.get_shape(); + auto output_size = output.get_size(); + float* output_data = output.data(); + std::vector linkMapData = {}; + std::vector textMapData = {}; + for (size_t i = 0; i < output_size; i+=2) + { + float scaled_value = output_data[i] * 255.0f; + float cla = std::max(0.0f, std::min(255.0f, scaled_value)); + uint8_t casted = static_cast(cla); + linkMapData.push_back(casted); + } + + for (size_t i = 1; i < output_size; i += 2) + { + float scaled_value = output_data[i] * 255.0f; + float cla = std::max(0.0f, std::min(255.0f, scaled_value)); + textMapData.push_back(cla); + } + const ov::Tensor linkMapTensor(ov::element::u8, { 1, output_shape[1],output_shape[2] }, linkMapData.data()); + const ov::Tensor textMapTensor(ov::element::u8, { 1, output_shape[1],output_shape[2] }, textMapData.data()); + const cv::Mat linkMap = convertToMat(linkMapTensor, true, true, false, false).clone(); + const cv::Mat textMap = convertToMat(textMapTensor, true, true, false, false).clone(); + auto tempTextMap = textMap.clone(); int r = linkMap.rows; int c = linkMap.cols; cv::Mat linkScore, textScore; @@ -171,7 +193,7 @@ std::vector CraftModel::getBoundingBoxes(const torch::Tensor &input cv::threshold(textMap, textScore, (lowText * 255), 255, 0); cv::Mat outputScore = linkScore.clone() + textScore.clone(); cv::min(cv::max(outputScore, 0), 255, outputScore); - outputScore.convertTo(outputScore,CV_8UC3); + outputScore.convertTo(outputScore, CV_8UC3); cv::Mat labels, stats, centroids; std::vector mapper; int numLabels = cv::connectedComponentsWithStats(outputScore, labels, stats, centroids, 4, CV_32S); @@ -184,7 +206,7 @@ std::vector CraftModel::getBoundingBoxes(const torch::Tensor &input cv::Mat mask = (labels == i); double minVal, maxVal; cv::Point minLoc, maxLoc; - cv::minMaxLoc(textMap, &minVal, &maxVal, &minLoc, &maxLoc,mask); + cv::minMaxLoc(textMap, &minVal, &maxVal, &minLoc, &maxLoc, mask); mapper.push_back(i); segMap.setTo(255, labels == i); cv::Mat linkMask = (linkScore == 1) & (textScore == 0); @@ -201,7 +223,7 @@ std::vector CraftModel::getBoundingBoxes(const torch::Tensor &input if (ex >= c) ex = c; if (ey >= r) ey = r; cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(1 + niter, 1 + niter)); - cv::dilate(segMap(cv::Range(sy, ey), cv::Range(sx, ex)), segMap(cv::Range(sy, ey), cv::Range(sx, ex)), kernel); + cv::dilate(segMap(cv::Range(sy, ey), cv::Range(sx, ex)), segMap(cv::Range(sy, ey), cv::Range(sx, ex)), kernel); // make box cv::Mat nonZeroCoords; cv::findNonZero(segMap, nonZeroCoords); @@ -213,14 +235,14 @@ std::vector CraftModel::getBoundingBoxes(const torch::Tensor &input BoundingBox detection; std::vector points; for (int i = 0; i < 4; i++) - { + { float colVal; float rowVal = box.at(i, 0); for (int j = 0; j < 1; j++) { colVal = box.at(i, 1); } - cv::Point point(rowVal, colVal ); + cv::Point point(rowVal, colVal); points.push_back(point); } std::sort(points.begin(), points.end(), pointSorter()); @@ -246,7 +268,7 @@ std::vector CraftModel::getBoundingBoxes(const torch::Tensor &input detection.topLeft.y = int(t) * 2; detection.bottomRight.x = int(r) * 2; detection.bottomRight.y = int(b) * 2; - + } */ detBoxes.push_back(detection); @@ -258,7 +280,7 @@ std::vector CraftModel::getBoundingBoxes(const torch::Tensor &input return detBoxes; } -cv::Mat CraftModel::normalize(const cv::Mat &img) +cv::Mat CraftModel::normalize(const cv::Mat& img) { std::vector channels(3); cv::Mat output; @@ -271,28 +293,25 @@ cv::Mat CraftModel::normalize(const cv::Mat &img) return output; } -torch::Tensor CraftModel::preProcess(const cv::Mat & matInput) +ov::Tensor CraftModel::preProcess(const cv::Mat& matInput) { //Normalize the input using mean + std values from easyOCR cv::Mat normedMatInput = this->normalize(matInput.clone()).clone(); - //Convert final input into a torch::Tensor from a cv::Mat - torch::Tensor input = this->convertToTensor(normedMatInput.clone()).clone(); + ov::Tensor input = this->convertToTensor(normedMatInput); return input; } -std::vector CraftModel::runDetector(torch::Tensor& input, bool merge) +std::vector CraftModel::runDetector(ov::Tensor& input, bool merge) { - int height = input.size(2); - int width = input.size(3); - std::vector detInput = { input.clone() }; - auto output = this->predict(detInput).squeeze().detach().clone(); + int height = input.get_shape()[1]; + int width = input.get_shape()[2]; + auto output = this->predict(input); auto ss = std::chrono::high_resolution_clock::now(); - auto detections = this->getBoundingBoxes(input.clone(),output.clone()); - //custom bounding box merging + auto detections = this->getBoundingBoxes(input, output); if (merge) detections = this->mergeBoundingBoxes(detections, .97, height, width); //auto ee = std::chrono::high_resolution_clock::now(); //auto difff = ee - ss; //std::cout << "TOTAL preprocessing TIME " << std::chrono::duration (difff).count() << " ms" << std::endl; return detections; -} +} \ No newline at end of file diff --git a/src/CRNN.cpp b/src/CRNN.cpp index ec1fb9e..56b22be 100644 --- a/src/CRNN.cpp +++ b/src/CRNN.cpp @@ -2,15 +2,15 @@ #include #include #include -using namespace torch::indexing; -CRNNModel::CRNNModel() : TorchModel() +CRNNModel::CRNNModel() : OpenVINOModel() { // eventually read from a config! std::string filename = "english_g2_characters.txt"; std::ifstream file(filename); - if (!file.is_open()) { + if (!file.is_open()) + { std::cerr << "Error: Unable to open file " << filename << std::endl; } @@ -32,45 +32,36 @@ float resizeComputeRatio(cv::Mat& img, int modelHeight) if (ratio < 1.0) { ratio = 1.0 / ratio; - cv::resize(img, img, cv::Size(modelHeight, int(modelHeight * ratio) )); + cv::resize(img, img, cv::Size(modelHeight, int(modelHeight * ratio))); } else { - - cv::resize(img, img, cv::Size(int(modelHeight * ratio),modelHeight )); - + + cv::resize(img, img, cv::Size(int(modelHeight * ratio), modelHeight)); + } - + return ratio; } -// Greedy decoding -std::string CRNNModel::greedyDecode(torch::Tensor& input, int size) + +std::string CRNNModel::greedyDecode(std::vector& encoded) { - int length = size; - std::vector ignoreList = { 0 }; - torch::Tensor t = input.slice(0, 0, input.size(0)); - auto a = torch::cat({ torch::tensor({true}), ~(t.slice(0,0, -1) == t.slice(0,1).flatten()) }, 0); - auto b = ~(t.unsqueeze(1) == torch::tensor(ignoreList).unsqueeze(0)).all(1); - auto c = a & b; - auto indices = c.nonzero(); - auto result = t.index_select(0, indices.flatten()); - std::vector extracted; - for (int i = 0; i < result.size(0); i++) { - int index = result[i].item(); - if (index >= 0 && index < this->characters.size()) { - extracted.push_back(this->characters[index]); + std::string text=""; + for (int i = 0; i < encoded.size(); i++) + { + if (encoded[i]!= 0 && encoded[i] != encoded[i + 1]) + { + text.push_back(this->characters[encoded[i]]); + } } - // Join the extracted characters into a single string - std::string text(extracted.begin(), extracted.end()); return text; } -//still need to implement beam search -torch::Tensor CRNNModel::preProcess(cv::Mat& det) +ov::Tensor CRNNModel::preProcess(cv::Mat& det) { // Default model height used in easyOCR float ratio = resizeComputeRatio(det, 64); @@ -80,15 +71,14 @@ torch::Tensor CRNNModel::preProcess(cv::Mat& det) //det.convertTo(det, -1, alpha, beta); //at least 128 in length - auto processedTensor = this->normalizePad(det, 256); + auto processedTensor = this->normalize(det); return processedTensor; } -std::vector CRNNModel::recognize(std::vector& dets, cv::Mat& img, int &maxWidth) +std::vector CRNNModel::recognize(std::vector& dets, cv::Mat& img) { // returns max width for padding and resize - std::vector processed; - float maxRatio = 0; + std::vector processed; std::vector results; for (auto& x : dets) { @@ -96,48 +86,105 @@ std::vector CRNNModel::recognize(std::vector& dets, cv: cv::Mat det = img(cv::Rect(x.topLeft.x, x.topLeft.y, (x.bottomRight.x - x.topLeft.x), (x.bottomRight.y - x.topLeft.y))).clone(); if (det.rows < 5) continue; - - torch::Tensor processedTensor = this->preProcess(det); - std::vector input{ processedTensor.unsqueeze(0) }; - auto ss = std::chrono::high_resolution_clock::now(); - torch::Tensor output = this->predict(input); - auto ee = std::chrono::high_resolution_clock::now(); + ov::Tensor processedTensor = this->preProcess(det); + //auto ss = std::chrono::high_resolution_clock::now(); + ov::Tensor output = this->predict(processedTensor); + /*auto ee = std::chrono::high_resolution_clock::now(); auto difff = ee - ss; + */ //std::cout << "TOTAL INFERENCE RECORNGITON TIME " << std::chrono::duration (difff).count() << " ms" << std::endl; - + //post process and decode - auto confidence = torch::softmax(output, 2); - auto norm = confidence.sum(2); - auto prob = (confidence / norm.unsqueeze(2)); - torch::Tensor predIndex; - std::tie(std::ignore, predIndex) = prob.max(2); - predIndex = predIndex.view({ -1 }); - std::string text = this->greedyDecode(predIndex, predIndex.size(0)); + auto confidence = this->softmax(output, 2); + float* confidence_data = confidence.data(); + + ov::Shape shape = confidence.get_shape(); + + std::vector maxes; + std::vector indices; + int counter = 0; + for (int i=0; i< confidence.get_shape()[1]; i++) + { + float max = confidence_data[counter]; + int idx = 0; + for (int j = 0; j < confidence.get_shape()[2]; j++) + { + if (confidence_data[counter] > max) + { + max = confidence_data[counter]; + idx = j; + } + counter++; + } + maxes.push_back(max); + indices.push_back(idx); + } + + std::string text = this->greedyDecode(indices); res.text = text; - res.confidence = *prob.data_ptr(); + res.confidence = *confidence.data(); res.coords = x; results.push_back(res); - processed.push_back(processedTensor); } - // 64 was model height used in easyOCR - float maxW = float(ratio * 64); return results; } -torch::Tensor CRNNModel::normalizePad(cv::Mat& processed, int maxWidth) +ov::Tensor CRNNModel::normalize(cv::Mat& processed) { - std::vector input; - auto converted = this->convertToTensor(processed.clone(), true, false).squeeze(0); - torch::Tensor pad = torch::zeros({ 1,converted.size(1),maxWidth}); - converted = (converted - (.5 ) / (.5 )); - if (maxWidth > converted.size(2)) + auto converted = this->convertToTensor(processed.clone(), true, false); + float* converted_data = converted.data(); + for (int i = 0; i < converted.get_size(); i++) { - pad.narrow(2, 0, converted.size(2)).copy_(converted.detach()); - auto padded = this->convertToMat(converted, true, true, false, false); - //cv::imwrite("Padded.jpg", padded); - converted = pad.clone(); + converted_data[i] = (converted_data[i] - (.5) / (.5)); } - int width = converted.size(2); + return converted; } +ov::Tensor CRNNModel::softmax(ov::Tensor& input, int dim) +{ + ov::Shape shape = input.get_shape(); + float* input_data = input.data(); + + ov::Tensor output(input.get_element_type(), shape); + float* output_data = output.data(); + + size_t batch_size = shape[0]; + size_t num_elements_per_axis = shape[dim]; + + for (size_t i = 0; i < input.get_size(); ++i) { + output_data[i] = std::exp(input_data[i]); + } + + for (size_t b = 0; b < batch_size; ++b) { + for (size_t j = 0; j < num_elements_per_axis; ++j) { + float sum_exp = 0.0f; + for (size_t k = 0; k < num_elements_per_axis; ++k) { + sum_exp += output_data[b * num_elements_per_axis + k]; + } + + for (size_t k = 0; k < num_elements_per_axis; ++k) { + output_data[b * num_elements_per_axis + k] /= sum_exp; + } + } + } + + return output; +} +void CRNNModel::print_tensor(ov::Tensor& tensor) +{ + float* data_ptr = tensor.data(); + int index = 0; + for (int i = 0; i < tensor.get_shape()[1]; i++) + { + for (int j = 0; j < tensor.get_shape()[2]; j++) + { + std::cout << data_ptr[index] << " "; + index++; + } + std::cout << std::endl; + } + + + +} \ No newline at end of file diff --git a/src/OpenvinoModel.cpp b/src/OpenvinoModel.cpp new file mode 100644 index 0000000..51f5eda --- /dev/null +++ b/src/OpenvinoModel.cpp @@ -0,0 +1,85 @@ + +#include "OpenvinoModel.h" + +OpenVINOModel::OpenVINOModel() +{ +} + +OpenVINOModel::~OpenVINOModel() +{ +} + +bool OpenVINOModel::loadModel(const std::string& modelPath, const std::string& device) +{ + bool success = false; + try + { + ov::Core core; + std::shared_ptr model = core.read_model(modelPath.c_str()); + ov::preprocess::PrePostProcessor ppp(model); + ppp.input(0).preprocess().convert_layout({ 0, 3, 1, 2 }); + model = ppp.build(); + compiled_model = core.compile_model(model, device); + success = true; + + } + catch (std::exception& e) + { + std::cout << "ERRORS"; + std::cout << e.what(); + } + return success; +} + + +ov::Tensor OpenVINOModel::predict(const ov::Tensor& input) +{ + ov::InferRequest ireq = this->compiled_model.create_infer_request(); + ireq.set_input_tensor(0, input); + ireq.infer(); + const ov::Tensor& output_tensor = ireq.get_output_tensor(0); + return output_tensor; + +} + + +ov::Tensor OpenVINOModel::convertToTensor(const cv::Mat& img, bool normalize, bool color) +{ + cv::Mat c = img.clone(); + if (color) + { + cv::cvtColor(c, c, cv::COLOR_BGR2RGB); + } + + float scale = (normalize) ? 1.0 / 255.0 : 1.0; + int channels = c.channels(); + auto colorRead = (channels == 3) ? CV_32FC3 : CV_32FC1; + c.convertTo(c, colorRead, scale); + + ov::Shape input_shape = { 1, size_t(c.rows), size_t(c.cols), size_t(channels) }; + ov::element::Type input_type = ov::element::f32; + ov::Tensor converted = ov::Tensor(input_type, input_shape, c.data); + ov::Tensor res = ov::Tensor(input_type, input_shape); + converted.copy_to(res); + return res; +} + +cv::Mat OpenVINOModel::loadMat(const std::string file, bool grey, bool rgb) +{ + auto readMode = (grey) ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR; + cv::Mat returnMat = cv::imread(file, readMode); + return returnMat; +} + + +cv::Mat OpenVINOModel::convertToMat(const ov::Tensor& output, bool isFloat, bool permute, bool bgr, bool color) +{ + int height = output.get_shape()[1]; + int width = output.get_shape()[2]; + int channels = output.get_shape()[0]; + auto dataType = (channels == 3) ? CV_8UC3 : CV_8UC1; + cv::Mat outputMat = cv::Mat(cv::Size(width, height), dataType, output.data()); //+channels + if (bgr) + cv::cvtColor(outputMat, outputMat, cv::COLOR_RGB2BGR); + return outputMat.clone(); +} diff --git a/src/TorchModel.cpp b/src/TorchModel.cpp deleted file mode 100644 index 24cae55..0000000 --- a/src/TorchModel.cpp +++ /dev/null @@ -1,193 +0,0 @@ - -#include "TorchModel.h" - -TorchModel::TorchModel() -{ - - if(torch::cuda::device_count() > 0) - { - torch::Device defaultDevice(torch::kCUDA,0); - this->device = defaultDevice; - } - else - { - torch::Device defaultCpu = torch::kCPU; - this->device = defaultCpu; - } - - -} - -TorchModel::~TorchModel() -{ -} - -bool TorchModel::loadModel(const std::string &modelPath) -{ - bool success = false; - try - { - - //auto startModel = chrono::steady_clock::now(); - this->model = torch::jit::load(modelPath.c_str()); - this->model.to(this->device); - //auto endModel = chrono::steady_clock::now(); - //auto diff = endModel - startModel; - //std::cout <<"MODEL TIME "<< chrono::duration (diff).count() << " ms"<model.eval(); - success = true; - - } - catch (std::exception &e) - { - std::cout << "ERRORS"; - std::cout << e.what(); - } - return success; -} - - -torch::Tensor TorchModel::predict(const std::vector &input) -{ - torch::Tensor result = torch::empty({ 0 }).to(this->device); - std::vector testInputs; - for(auto &x:input) - testInputs.push_back(x.to(this->device)); - - try - { - auto res = this->model.forward(testInputs).toTensor(); - return res; - - } - - catch (std::exception &e) - { - std::cout << e.what() << std::endl; - - } - - // Clears growing cuda cache and frees memory if process is interupted. - //c10::cuda::CUDACachingAllocator::emptyCache(); - return result; -} - - -torch::Tensor TorchModel::predictTuple(const std::vector& input) -{ - torch::Tensor result = torch::empty({ 0 }).to(this->device); - std::vector testInputs; - for (auto& x : input) - testInputs.push_back(x.to(this->device)); - - try - { - auto res = this->model.forward(testInputs).toTuple()->elements()[0].toTensor(); - return res; - - } - - catch (std::exception& e) - { - std::cout << e.what() << std::endl; - - } - // Clears growing cuda cache and frees memory if process is interupted. - //c10::cuda::CUDACachingAllocator::emptyCache(); - return result; -} - -void TorchModel::changeDevice(const torch::DeviceType &deviceSet, const int &index) -{ - int deviceCount = torch::cuda::device_count(); - //MOVE model and all tensors created from now on to desired device - if(deviceCount > 0 && deviceSet == torch::kCUDA) - { - if(index < deviceCount) - { - torch::Device dev(deviceSet,index); - this->device = dev; - this->model.to(this->device); - } - else - { - //Trying to use a device thats not there, set to next available GPU - torch::Device dev(deviceSet,deviceCount-1); - this->device = dev; - this->model.to(this->device); - } - } - else - //Set to CPU if there are no CUDA devices - { - torch::Device dev = torch::kCPU; - this->device = dev; - this->model.to(this->device); - } -} - -torch::Tensor TorchModel::convertToTensor(const cv::Mat& img, bool normalize,bool color) -{ - cv::Mat c = img.clone(); - if (color) - { - cv::cvtColor(c, c, cv::COLOR_BGR2RGB); - } - - float scale = (normalize) ? 1.0 / 255.0 : 1.0; - int channels = c.channels(); - auto colorRead = (channels == 3) ? CV_32FC3 : CV_32FC1; - c.convertTo(c,colorRead , scale); - - torch::Tensor converted = torch::zeros({ c.rows, c.cols,channels }, torch::kF32); - std::memcpy(converted.data_ptr(), c.data, sizeof(float) * converted.numel()); - - // add color dimension if it is greyscale 1 - converted = converted.permute({ 2, 0, 1 }); - - //Add batch dimension - converted = converted.unsqueeze(0).to(this->device); - - return converted; -} - -cv::Mat TorchModel::loadMat(const std::string file, bool grey, bool rgb) -{ - auto readMode = (grey) ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR; - cv::Mat returnMat = cv::imread(file, readMode); - return returnMat; -} - -torch::Tensor TorchModel::convertListToTensor(std::list& imgs) -{ - - - //Initalize tensor with first image and pop it from list - cv::Mat first = imgs.front(); - torch::Tensor converted = this->convertToTensor(first); - imgs.pop_front(); - //Concat all images to a single tensor - for (auto& img : imgs) - { - torch::Tensor next = this->convertToTensor(img); - converted = torch::cat({ next,converted }); - } - return converted.to(this->device); -} - -cv::Mat TorchModel::convertToMat(const torch::Tensor& output, bool isFloat, bool permute, bool bgr, bool color) -{ - torch::Tensor tensor = output.clone(); - tensor = tensor.permute({ 1, 2, 0 }).contiguous(); - // if float, image is range of 0 -> 1 - tensor = (isFloat) ? tensor.mul(255).clamp(0, 255).to(torch::kU8): tensor.to(torch::kU8); - tensor = tensor.to(torch::kCPU); - int64_t height = tensor.size(0); - int64_t width = tensor.size(1); - int channels = tensor.size(2); - auto dataType = (channels == 3) ? CV_8UC3 : CV_8UC1; - cv::Mat outputMat = cv::Mat(cv::Size(width, height), dataType, tensor.data_ptr()); - if(bgr) - cv::cvtColor(outputMat, outputMat, cv::COLOR_RGB2BGR); - return outputMat.clone(); -} \ No newline at end of file diff --git a/torchExample.cpp b/torchExample.cpp deleted file mode 100644 index a157761..0000000 --- a/torchExample.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include "CRAFT.h" -#include "TorchModel.h" -#include -#include -#include "CRNN.h" -using namespace torch::indexing; -int main() -{ - torch::NoGradGuard no_grad_guard; - c10::InferenceMode guard; - CRNNModel recognition; - CraftModel detection; - - // set to mimi Tesseract - cv::setNumThreads(4); - torch::set_num_threads(4); - - std::string det = "CRAFT-detector.pt"; - std::string rec = "traced-recog.pt"; - - // Set your input image here! - std::string filePath = "test.jpg"; - - auto startModel = std::chrono::steady_clock::now(); - // Always check the model was loaded successully - auto check_rec = recognition.loadModel(rec.c_str()); - auto check_det = detection.loadModel(det.c_str()); - auto endModel = std::chrono::steady_clock::now(); - - auto diff = endModel - startModel; - std::cout << "MODEL TIME " << std::chrono::duration (diff).count() << " ms" << std::endl; - - //CHECK IF BOTH MODEL LOADED SUCESSFULLY - if (check_rec && check_det) - { - int runs = 1; - // Load in image into openCV Mat (bW or color) - cv::Mat matInput = detection.loadMat(filePath, false, true).clone(); - // resizes input if we need to - HeatMapRatio processed = detection.resizeAspect(matInput); - cv::Mat clone = processed.img.clone(); - cv::Mat grey = processed.img.clone(); - grey.convertTo(grey, CV_8UC1); - cv::cvtColor(grey,grey, cv::COLOR_BGR2GRAY); - torch::Tensor tempTensor = detection.convertToTensor(grey.clone(), true, false).squeeze(0); - clone.convertTo(clone, CV_8UC3); - for (int i = 0; i < runs; i++) - { - - torch::Tensor input = detection.preProcess(processed.img.clone()); - auto ss = std::chrono::high_resolution_clock::now(); - // use custom algorithm for bounding box merging - std::vector dets = detection.runDetector(input,true); - int maxWidth; - std::vector results = recognition.recognize(dets, grey,maxWidth); - auto ee = std::chrono::high_resolution_clock::now(); - auto difff = ee - ss; - int count = 0; - for (auto x : dets) - { - rectangle(clone, x.topLeft, x.bottomRight, cv::Scalar(0, 255, 0)); - putText(clone, std::to_string(count), (x.bottomRight + x.topLeft)/2, cv::FONT_HERSHEY_COMPLEX, .6, cv::Scalar(100,0, 255)); - count++; - - } - for (auto& result : results) - { - std::cout << "LOCATION: " << result.coords.topLeft << " " << result.coords.bottomRight << std::endl; - std::cout << "TEXT: " << result.text << std::endl; - std::cout << "CONFIDENCE " << result.confidence << std::endl; - std::cout << "################################################" << std::endl; - } - cv::imwrite("../output-heatmap.jpg", clone); - std::cout << "TOTAL INFERENCE TIME " << std::chrono::duration (difff).count() << " ms" << std::endl; - } - - } - return 0; -}