diff --git a/.gitignore b/.gitignore
index 5f01e718..d14f4595 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,4 @@ yarn-debug.log*
/docuseal
/ee
dump.rdb
+*.onnx
diff --git a/Dockerfile b/Dockerfile
index 1e397cf0..b0be901f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,6 +9,7 @@ RUN apk --no-cache add fontforge wget && \
wget https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansSymbols2/hinted/ttf/NotoSansSymbols2-Regular.ttf && \
wget https://github.com/Maxattax97/gnu-freefont/raw/master/ttf/FreeSans.ttf && \
wget https://github.com/impallari/DancingScript/raw/master/OFL.txt && \
+ wget -O /model.onnx "https://github.com/docusealco/fields-detection/releases/download/1.0.0/model_704_int8.onnx" && \
wget -O pdfium-linux.tgz "https://github.com/docusealco/pdfium-binaries/releases/latest/download/pdfium-linux-$(uname -m | sed 's/x86_64/x64/;s/aarch64/arm64/').tgz" && \
mkdir -p /pdfium-linux && \
tar -xzf pdfium-linux.tgz -C /pdfium-linux
@@ -50,7 +51,7 @@ ENV OPENSSL_CONF=/app/openssl_legacy.cnf
WORKDIR /app
-RUN echo '@edge https://dl-cdn.alpinelinux.org/alpine/edge/community' >> /etc/apk/repositories && apk add --no-cache sqlite-dev libpq-dev mariadb-dev vips-dev@edge yaml-dev redis libheif@edge vips-heif@edge gcompat ttf-freefont && mkdir /fonts && rm /usr/share/fonts/freefont/FreeSans.otf
+RUN apk add --no-cache sqlite-dev libpq-dev mariadb-dev vips-dev yaml-dev redis libheif vips-heif gcompat ttf-freefont && mkdir /fonts && rm /usr/share/fonts/freefont/FreeSans.otf
RUN echo $'.include = /etc/ssl/openssl.cnf\n\
\n\
@@ -66,7 +67,9 @@ activate = 1' >> /app/openssl_legacy.cnf
COPY ./Gemfile ./Gemfile.lock ./
-RUN apk add --no-cache build-base && bundle install && apk del --no-cache build-base && rm -rf ~/.bundle /usr/local/bundle/cache && ruby -e "puts Dir['/usr/local/bundle/**/{spec,rdoc,resources/shared,resources/collation,resources/locales}']" | xargs rm -rf
+RUN apk add --no-cache build-base && bundle install && apk del --no-cache build-base && rm -rf ~/.bundle /usr/local/bundle/cache && ruby -e "puts Dir['/usr/local/bundle/**/{spec,rdoc,resources/shared,resources/collation,resources/locales}']" | xargs rm -rf && ln -sf /usr/lib/libonnxruntime.so.1 $(ruby -e "print Dir[Gem::Specification.find_by_name('onnxruntime').gem_dir + '/vendor/*.so'].first")
+
+RUN echo 'https://dl-cdn.alpinelinux.org/alpine/edge/main' >> /etc/apk/repositories && echo 'https://dl-cdn.alpinelinux.org/alpine/edge/community' >> /etc/apk/repositories && apk add --no-cache onnxruntime
COPY ./bin ./bin
COPY ./app ./app
@@ -83,6 +86,7 @@ COPY --from=download /fonts/GoNotoKurrent-Regular.ttf /fonts/GoNotoKurrent-Bold.
COPY --from=download /fonts/FreeSans.ttf /usr/share/fonts/freefont
COPY --from=download /pdfium-linux/lib/libpdfium.so /usr/lib/libpdfium.so
COPY --from=download /pdfium-linux/licenses/pdfium.txt /usr/lib/libpdfium-LICENSE.txt
+COPY --from=download /model.onnx /app/tmp/model.onnx
COPY --from=webpack /app/public/packs ./public/packs
RUN ln -s /fonts /app/public/fonts
diff --git a/Gemfile b/Gemfile
index 3a704a0d..b0974208 100644
--- a/Gemfile
+++ b/Gemfile
@@ -24,7 +24,9 @@ gem 'image_processing'
gem 'jwt'
gem 'lograge'
gem 'mysql2', require: false
+gem 'numo-narray'
gem 'oj'
+gem 'onnxruntime'
gem 'pagy'
gem 'pg', require: false
gem 'premailer-rails'
diff --git a/Gemfile.lock b/Gemfile.lock
index 84d527e6..f8f8b1ee 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -357,9 +357,18 @@ GEM
racc (~> 1.4)
nokogiri (1.18.9-x86_64-linux-musl)
racc (~> 1.4)
+ numo-narray (0.9.2.1)
oj (3.16.11)
bigdecimal (>= 3.0)
ostruct (>= 0.2)
+ onnxruntime (0.10.1)
+ ffi
+ onnxruntime (0.10.1-aarch64-linux)
+ ffi
+ onnxruntime (0.10.1-arm64-darwin)
+ ffi
+ onnxruntime (0.10.1-x86_64-linux)
+ ffi
openssl (3.3.0)
orm_adapter (0.5.0)
os (1.1.4)
@@ -638,7 +647,9 @@ DEPENDENCIES
letter_opener_web
lograge
mysql2
+ numo-narray
oj
+ onnxruntime
pagy
pg
premailer-rails
diff --git a/app/controllers/templates_debug_controller.rb b/app/controllers/templates_debug_controller.rb
index 676c2f64..edaedff0 100644
--- a/app/controllers/templates_debug_controller.rb
+++ b/app/controllers/templates_debug_controller.rb
@@ -6,12 +6,18 @@ class TemplatesDebugController < ApplicationController
DEBUG_FILE = ''
def show
- attachment = @template.documents.first
+ schema_uuids = @template.schema.index_by { |e| e['attachment_uuid'] }
+ attachment = @template.documents.find { |a| schema_uuids[a.uuid] }
data = attachment.download
- pdf = HexaPDF::Document.new(io: StringIO.new(data))
- fields = Templates::FindAcroFields.call(pdf, attachment, data)
+ unless attachment.image?
+ pdf = HexaPDF::Document.new(io: StringIO.new(data))
+
+ fields = Templates::FindAcroFields.call(pdf, attachment, data)
+ end
+
+ fields = Templates::DetectFields.call(StringIO.new(data), attachment:) if fields.blank?
attachment.metadata['pdf'] ||= {}
attachment.metadata['pdf']['fields'] = fields
diff --git a/app/controllers/templates_detect_fields_controller.rb b/app/controllers/templates_detect_fields_controller.rb
new file mode 100644
index 00000000..8355dcb2
--- /dev/null
+++ b/app/controllers/templates_detect_fields_controller.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+class TemplatesDetectFieldsController < ApplicationController
+ include ActionController::Live
+
+ load_and_authorize_resource :template
+
+ def create
+ response.headers['Content-Type'] = 'text/event-stream'
+
+ sse = SSE.new(response.stream)
+
+ documents = @template.schema_documents.preload(:blob)
+
+ documents.each do |document|
+ io = StringIO.new(document.download)
+
+ Templates::DetectFields.call(io, attachment: document) do |(attachment_uuid, page, fields)|
+ sse.write({ attachment_uuid:, page:, fields: })
+ end
+ end
+
+ sse.write({ completed: true })
+ ensure
+ response.stream.close
+ end
+end
diff --git a/app/javascript/application.js b/app/javascript/application.js
index 31e28407..9b17172b 100644
--- a/app/javascript/application.js
+++ b/app/javascript/application.js
@@ -156,6 +156,7 @@ safeRegisterElement('template-builder', class extends HTMLElement {
withPhone: this.dataset.withPhone === 'true',
withVerification: ['true', 'false'].includes(this.dataset.withVerification) ? this.dataset.withVerification === 'true' : null,
withLogo: this.dataset.withLogo !== 'false',
+ withFieldsDetection: this.dataset.withFieldsDetection === 'true',
editable: this.dataset.editable !== 'false',
authenticityToken: document.querySelector('meta[name="csrf-token"]')?.content,
withPayment: this.dataset.withPayment === 'true',
diff --git a/app/javascript/template_builder/builder.vue b/app/javascript/template_builder/builder.vue
index 07abcba9..bcf376ac 100644
--- a/app/javascript/template_builder/builder.vue
+++ b/app/javascript/template_builder/builder.vue
@@ -449,6 +449,7 @@
:default-required-fields="defaultRequiredFields"
:field-types="fieldTypes"
:with-sticky-submitters="withStickySubmitters"
+ :with-fields-detection="withFieldsDetection"
:with-signature-id="withSignatureId"
:with-prefillable="withPrefillable"
:only-defined-fields="onlyDefinedFields"
@@ -618,6 +619,11 @@ export default {
required: false,
default: true
},
+ withFieldsDetection: {
+ type: Boolean,
+ required: false,
+ default: false
+ },
withAddPageButton: {
type: Boolean,
required: false,
diff --git a/app/javascript/template_builder/fields.vue b/app/javascript/template_builder/fields.vue
index 922b98dd..7b789436 100644
--- a/app/javascript/template_builder/fields.vue
+++ b/app/javascript/template_builder/fields.vue
@@ -208,6 +208,34 @@
+
+
+
{
+ return acc + doc.metadata?.pdf?.number_of_pages || doc.preview_images.length
+ }, 0)
+ },
isShowFieldSearch () {
if (this.withFieldsSearch === false) {
return false
@@ -389,6 +430,61 @@ export default {
this.$emit('set-drag', field)
},
+ detectFields () {
+ const fields = []
+
+ this.fieldPagesLoaded = 0
+
+ this.baseFetch(`/templates/${this.template.id}/detect_fields`, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json'
+ }
+ }).then(async (response) => {
+ const reader = response.body.getReader()
+ const decoder = new TextDecoder('utf-8')
+ let buffer = ''
+
+ while (true) {
+ const { value, done } = await reader.read()
+
+ if (done) break
+
+ buffer += decoder.decode(value, { stream: true })
+
+ const lines = buffer.split('\n\n')
+
+ buffer = lines.pop()
+
+ for (const line of lines) {
+ if (line.startsWith('data: ')) {
+ const jsonStr = line.replace(/^data: /, '')
+ const data = JSON.parse(jsonStr)
+
+ if (data.completed) {
+ this.fieldPagesLoaded = null
+ this.template.fields = fields
+
+ break
+ } else if (data.fields) {
+ data.fields.forEach((f) => {
+ f.submitter_uuid = this.template.submitters[0].uuid
+ })
+
+ this.fieldPagesLoaded += 1
+
+ fields.push(...data.fields)
+ }
+ }
+ }
+ }
+ }).catch(error => {
+ console.error('Error in streaming message: ', error)
+ }).finally(() => {
+ this.fieldPagesLoaded = null
+ this.isFieldsLoading = false
+ })
+ },
setDragPlaceholder (event) {
this.$emit('set-drag-placeholder', {
offsetX: event.offsetX,
diff --git a/app/javascript/template_builder/i18n.js b/app/javascript/template_builder/i18n.js
index 75b450fe..a23e5c8b 100644
--- a/app/javascript/template_builder/i18n.js
+++ b/app/javascript/template_builder/i18n.js
@@ -1,5 +1,6 @@
const en = {
view: 'View',
+ autodetect_fields: 'Autodetect fields',
payment_link: 'Payment link',
strikeout: 'Strikeout',
draw_strikethrough_the_document: 'Draw strikethrough the document',
diff --git a/app/views/templates/edit.html.erb b/app/views/templates/edit.html.erb
index 79a10118..9fbc8d39 100644
--- a/app/views/templates/edit.html.erb
+++ b/app/views/templates/edit.html.erb
@@ -6,4 +6,4 @@
<%= button_to nil, user_configs_path, method: :post, params: { user_config: { key: UserConfig::SHOW_APP_TOUR, value: true } }, class: 'hidden', id: 'start_tour_button' %>
<% end %>
<% end %>
-
+
diff --git a/config/routes.rb b/config/routes.rb
index e90bd2f2..43701da1 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -100,6 +100,9 @@ Rails.application.routes.draw do
resource :debug, only: %i[show], controller: 'templates_debug' if Rails.env.development?
resources :documents, only: %i[create], controller: 'template_documents'
resources :clone_and_replace, only: %i[create], controller: 'templates_clone_and_replace'
+ if !Docuseal.multitenant? || Docuseal.demo?
+ resources :detect_fields, only: %i[create], controller: 'templates_detect_fields'
+ end
resources :restore, only: %i[create], controller: 'templates_restore'
resources :archived, only: %i[index], controller: 'templates_archived_submissions'
resources :submissions, only: %i[new create]
diff --git a/lib/pdfium.rb b/lib/pdfium.rb
index 317dddf0..464f95e2 100644
--- a/lib/pdfium.rb
+++ b/lib/pdfium.rb
@@ -25,6 +25,8 @@ class Pdfium
typedef :pointer, :FPDF_BITMAP
typedef :pointer, :FPDF_FORMHANDLE
typedef :pointer, :FPDF_TEXTPAGE
+ typedef :pointer, :FPDF_PAGEOBJECT
+ typedef :pointer, :FPDF_PATHSEGMENT
MAX_SIZE = 32_767
@@ -37,6 +39,9 @@ class Pdfium
FPDF_RENDER_FORCEHALFTONE = 0x400
FPDF_PRINTING = 0x800
+ TextNode = Struct.new(:content, :x, :y, :w, :h, keyword_init: true)
+ LineNode = Struct.new(:x, :y, :w, :h, :tilt, keyword_init: true)
+
# rubocop:disable Naming/ClassAndModuleCamelCase
class FPDF_LIBRARY_CONFIG < FFI::Struct
layout :version, :int,
@@ -77,6 +82,37 @@ class Pdfium
attach_function :FPDFText_ClosePage, [:FPDF_TEXTPAGE], :void
attach_function :FPDFText_CountChars, [:FPDF_TEXTPAGE], :int
attach_function :FPDFText_GetText, %i[FPDF_TEXTPAGE int int pointer], :int
+ attach_function :FPDFText_GetUnicode, %i[FPDF_TEXTPAGE int], :uint
+ attach_function :FPDFText_GetCharBox, %i[FPDF_TEXTPAGE int pointer pointer pointer pointer], :int
+ attach_function :FPDFText_GetCharOrigin, %i[FPDF_TEXTPAGE int pointer pointer], :int
+ attach_function :FPDFText_GetCharIndexAtPos, %i[FPDF_TEXTPAGE double double double double], :int
+ attach_function :FPDFText_CountRects, %i[FPDF_TEXTPAGE int int], :int
+ attach_function :FPDFText_GetRect, %i[FPDF_TEXTPAGE int pointer pointer pointer pointer], :int
+ attach_function :FPDFText_GetFontSize, %i[FPDF_TEXTPAGE int], :double
+
+ # Page object functions for extracting paths/lines
+ attach_function :FPDFPage_CountObjects, [:FPDF_PAGE], :int
+ attach_function :FPDFPage_GetObject, %i[FPDF_PAGE int], :FPDF_PAGEOBJECT
+ attach_function :FPDFPageObj_GetType, [:FPDF_PAGEOBJECT], :int
+ attach_function :FPDFPageObj_GetBounds, %i[FPDF_PAGEOBJECT pointer pointer pointer pointer], :int
+ attach_function :FPDFPath_CountSegments, [:FPDF_PAGEOBJECT], :int
+ attach_function :FPDFPath_GetPathSegment, %i[FPDF_PAGEOBJECT int], :FPDF_PATHSEGMENT
+ attach_function :FPDFPathSegment_GetType, [:FPDF_PATHSEGMENT], :int
+ attach_function :FPDFPathSegment_GetPoint, %i[FPDF_PATHSEGMENT pointer pointer], :int
+
+ # Page object types
+ FPDF_PAGEOBJ_UNKNOWN = 0
+ FPDF_PAGEOBJ_TEXT = 1
+ FPDF_PAGEOBJ_PATH = 2
+ FPDF_PAGEOBJ_IMAGE = 3
+ FPDF_PAGEOBJ_SHADING = 4
+ FPDF_PAGEOBJ_FORM = 5
+
+ # Path segment types
+ FPDF_SEGMENT_UNKNOWN = -1
+ FPDF_SEGMENT_LINETO = 0
+ FPDF_SEGMENT_BEZIERTO = 1
+ FPDF_SEGMENT_MOVETO = 2
typedef :int, :FPDF_BOOL
typedef :pointer, :IPDF_JSPLATFORM
@@ -157,6 +193,7 @@ class Pdfium
raise PdfiumError, "#{context_message}: #{error_message(error_code)} (Code: #{error_code})"
end
+ # rubocop:disable Metrics
class Document
attr_reader :document_ptr, :form_handle
@@ -386,6 +423,128 @@ class Pdfium
Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null?
end
+ def text_nodes
+ return @text_nodes if @text_nodes
+
+ text_page = Pdfium.FPDFText_LoadPage(page_ptr)
+ char_count = Pdfium.FPDFText_CountChars(text_page)
+
+ @text_nodes = []
+
+ return @text_nodes if char_count.zero?
+
+ char_count.times do |i|
+ unicode = Pdfium.FPDFText_GetUnicode(text_page, i)
+
+ char = [unicode].pack('U*')
+
+ left_ptr = FFI::MemoryPointer.new(:double)
+ right_ptr = FFI::MemoryPointer.new(:double)
+ bottom_ptr = FFI::MemoryPointer.new(:double)
+ top_ptr = FFI::MemoryPointer.new(:double)
+
+ result = Pdfium.FPDFText_GetCharBox(text_page, i, left_ptr, right_ptr, bottom_ptr, top_ptr)
+
+ next if result.zero?
+
+ left = left_ptr.read_double
+ right = right_ptr.read_double
+
+ origin_x_ptr = FFI::MemoryPointer.new(:double)
+ origin_y_ptr = FFI::MemoryPointer.new(:double)
+
+ Pdfium.FPDFText_GetCharOrigin(text_page, i, origin_x_ptr, origin_y_ptr)
+
+ origin_y = origin_y_ptr.read_double
+
+ font_size = Pdfium.FPDFText_GetFontSize(text_page, i)
+ font_size = 8 if font_size == 1
+
+ abs_x = left
+ abs_y = height - origin_y - (font_size * 0.8)
+ abs_width = right - left
+ abs_height = font_size
+
+ x = abs_x / width
+ y = abs_y / height
+ node_width = abs_width / width
+ node_height = abs_height / height
+
+ @text_nodes << TextNode.new(content: char, x: x, y: y, w: node_width, h: node_height)
+ end
+
+ @text_nodes = @text_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
+ ensure
+ Pdfium.FPDFText_ClosePage(text_page) if text_page && !text_page.null?
+ end
+
+ def line_nodes
+ return @line_nodes if @line_nodes
+
+ ensure_not_closed!
+
+ @line_nodes = []
+
+ object_count = Pdfium.FPDFPage_CountObjects(page_ptr)
+
+ return @line_nodes if object_count.zero?
+
+ object_count.times do |i|
+ page_object = Pdfium.FPDFPage_GetObject(page_ptr, i)
+
+ next if page_object.null?
+
+ obj_type = Pdfium.FPDFPageObj_GetType(page_object)
+
+ next unless obj_type == Pdfium::FPDF_PAGEOBJ_PATH
+
+ left_ptr = FFI::MemoryPointer.new(:float)
+ bottom_ptr = FFI::MemoryPointer.new(:float)
+ right_ptr = FFI::MemoryPointer.new(:float)
+ top_ptr = FFI::MemoryPointer.new(:float)
+
+ Pdfium.FPDFPageObj_GetBounds(page_object, left_ptr, bottom_ptr, right_ptr, top_ptr)
+
+ obj_left = left_ptr.read_float
+ obj_bottom = bottom_ptr.read_float
+ obj_right = right_ptr.read_float
+ obj_top = top_ptr.read_float
+
+ obj_width = obj_right - obj_left
+ obj_height = obj_top - obj_bottom
+
+ next if obj_width < 1 && obj_height < 1
+
+ segment_count = Pdfium.FPDFPath_CountSegments(page_object)
+
+ next if segment_count < 2
+
+ next unless segment_count <= 10 && (obj_height < 10 || obj_width < 10)
+
+ if obj_width > obj_height && obj_height < 10
+ tilt = 0
+ elsif obj_height > obj_width && obj_width < 10
+ tilt = 90
+ else
+ next
+ end
+
+ x = obj_left
+ y = obj_bottom
+ w = obj_width
+ h = obj_height
+
+ norm_x = x / width
+ norm_y = (height - y - h) / height
+ norm_w = w / width
+ norm_h = h / height
+
+ @line_nodes << LineNode.new(x: norm_x, y: norm_y, w: norm_w, h: norm_h, tilt: tilt)
+ end
+
+ @line_nodes = @line_nodes.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
+ end
+
def close
return if closed?
@@ -445,4 +604,5 @@ class Pdfium
at_exit do
cleanup_library
end
+ # rubocop:enable Metrics
end
diff --git a/lib/templates/detect_fields.rb b/lib/templates/detect_fields.rb
new file mode 100755
index 00000000..20bb9e3f
--- /dev/null
+++ b/lib/templates/detect_fields.rb
@@ -0,0 +1,264 @@
+# frozen_string_literal: true
+
+module Templates
+ module DetectFields
+ module_function
+
+ TextFieldBox = Struct.new(:x, :y, :w, :h, keyword_init: true)
+
+ # rubocop:disable Metrics
+ def call(io, attachment: nil, confidence: 0.3, temperature: 1,
+ nms: 0.1, split_page: false, aspect_ratio: true, padding: 20, &)
+ if attachment&.image?
+ process_image_attachment(io, attachment:, confidence:, nms:, split_page:,
+ temperature:, aspect_ratio:, padding:, &)
+ else
+ process_pdf_attachment(io, attachment:, confidence:, nms:, split_page:,
+ temperature:, aspect_ratio:, padding:, &)
+ end
+ end
+
+ def process_image_attachment(io, attachment:, confidence:, nms:, temperature: 1,
+ split_page: false, aspect_ratio: false, padding: nil)
+ image = Vips::Image.new_from_buffer(io.read, '')
+
+ fields = Templates::ImageToFields.call(image, confidence:, nms:, split_page:,
+ temperature:, aspect_ratio:, padding:)
+
+ fields = fields.map do |f|
+ {
+ uuid: SecureRandom.uuid,
+ type: f.type,
+ required: true,
+ preferences: {},
+ areas: [{
+ x: f.x,
+ y: f.y,
+ w: f.w,
+ h: f.h,
+ page: 0,
+ attachment_uuid: attachment&.uuid
+ }]
+ }
+ end
+
+ yield [attachment&.uuid, 0, fields] if block_given?
+
+ fields
+ end
+
+ def process_pdf_attachment(io, attachment:, confidence:, nms:, temperature: 1,
+ split_page: false, aspect_ratio: false, padding: nil)
+ doc = Pdfium::Document.open_bytes(io.read)
+
+ doc.page_count.times.flat_map do |page_number|
+ page = doc.get_page(page_number)
+
+ data, width, height = page.render_to_bitmap(width: ImageToFields::RESOLUTION * 1.5)
+
+ image = Vips::Image.new_from_memory(data, width, height, 4, :uchar)
+
+ fields = Templates::ImageToFields.call(image, confidence: 0.05, nms:, split_page:,
+ temperature:, aspect_ratio:, padding:)
+
+ text_fields = extract_text_fields_from_page(page)
+ line_fields = extract_line_fields_from_page(page)
+
+ fields = increase_confidence_for_overlapping_fields(fields, text_fields)
+ fields = increase_confidence_for_overlapping_fields(fields, line_fields)
+
+ fields = fields.filter_map do |f|
+ next if f.confidence < confidence
+
+ {
+ uuid: SecureRandom.uuid,
+ type: f.type,
+ required: true,
+ preferences: {},
+ areas: [{
+ x: f.x, y: f.y,
+ w: f.w, h: f.h,
+ page: page_number,
+ attachment_uuid: attachment&.uuid
+ }]
+ }
+ end
+
+ yield [attachment&.uuid, page_number, fields] if block_given?
+
+ fields
+ end
+ end
+
+ def extract_line_fields_from_page(page)
+ line_thickness = 5.0 / page.height
+
+ vertical_lines, all_horizontal_lines = page.line_nodes.partition { |line| line.tilt == 90 }
+
+ horizontal_lines = all_horizontal_lines.reject do |h_line|
+ next true if h_line.w > 0.7 && (h_line.h < 0.1 || h_line.h < 0.9)
+
+ next false if vertical_lines.blank?
+
+ h_x_min = h_line.x
+ h_x_max = h_line.x + h_line.w
+ h_y_avg = h_line.y + (h_line.h / 2)
+
+ vertical_lines.any? do |v_line|
+ v_x_avg = v_line.x + (v_line.w / 2)
+ v_y_min = v_line.y
+ v_y_max = v_line.y + v_line.h
+
+ h_x_min_expanded = h_x_min - line_thickness
+ h_x_max_expanded = h_x_max + line_thickness
+ h_y_min_expanded = h_y_avg - line_thickness
+ h_y_max_expanded = h_y_avg + line_thickness
+
+ v_x_min_expanded = v_x_avg - line_thickness
+ v_x_max_expanded = v_x_avg + line_thickness
+ v_y_min_expanded = v_y_min - line_thickness
+ v_y_max_expanded = v_y_max + line_thickness
+
+ x_overlap = v_x_min_expanded <= h_x_max_expanded && v_x_max_expanded >= h_x_min_expanded
+ y_overlap = h_y_min_expanded <= v_y_max_expanded && h_y_max_expanded >= v_y_min_expanded
+
+ x_overlap && y_overlap
+ end
+ end
+
+ node_index = 0
+
+ horizontal_lines = horizontal_lines.reject do |line|
+ nodes = []
+
+ loop do
+ node = page.text_nodes[node_index += 1]
+
+ break unless node
+
+ break if node.y > line.y
+
+ next if node.x + node.w < line.x || line.x + line.w < node.x ||
+ node.y + node.h < line.y - node.h || line.y < node.y
+
+ nodes << node
+
+ next if nodes.blank?
+
+ next_node = page.text_nodes[node_index + 1]
+
+ break if next_node.x + next_node.w < line.x || line.x + line.w < next_node.x ||
+ next_node.y + next_node.h < line.y - next_node.h || line.y < next_node.y
+ end
+
+ next if nodes.blank?
+
+ width = nodes.last.x + nodes.last.w - nodes.first.x
+
+ next true if width > line.w / 2.0
+ end
+
+ horizontal_lines.each do |line|
+ line.h += 4 * line_thickness
+ line.y -= 4 * line_thickness
+ end
+ end
+
+ def extract_text_fields_from_page(page)
+ text_nodes = page.text_nodes
+
+ field_boxes = []
+
+ i = 0
+
+ while i < text_nodes.length
+ node = text_nodes[i]
+
+ next i += 1 if node.content != '_'
+
+ x1 = node.x
+ y1 = node.y
+ x2 = node.x + node.w
+ y2 = node.y + node.h
+
+ underscore_count = 1
+
+ j = i + 1
+
+ while j < text_nodes.length
+ next_node = text_nodes[j]
+
+ break unless next_node.content == '_'
+
+ distance = next_node.x - x2
+ height_diff = (next_node.y - y1).abs
+
+ break if distance > 0.02 || height_diff > node.h * 0.5
+
+ underscore_count += 1
+ next_x2 = next_node.x + next_node.w
+ next_y2 = next_node.y + next_node.h
+
+ x2 = next_x2
+ y2 = [y2, next_y2].max
+ y1 = [y1, next_node.y].min
+
+ j += 1
+ end
+
+ field_boxes << TextFieldBox.new(x: x1, y: y1, w: x2 - x1, h: y2 - y1) if underscore_count >= 2
+
+ i = j
+ end
+
+ field_boxes
+ end
+
+ def calculate_iou(box1, box2)
+ x1 = [box1.x, box2.x].max
+ y1 = [box1.y, box2.y].max
+ x2 = [box1.x + box1.w, box2.x + box2.w].min
+ y2 = [box1.y + box1.h, box2.y + box2.h].min
+
+ intersection_width = [0, x2 - x1].max
+ intersection_height = [0, y2 - y1].max
+ intersection_area = intersection_width * intersection_height
+
+ return 0.0 if intersection_area.zero?
+
+ box1_area = box1.w * box1.h
+ box2_area = box2.w * box2.h
+ union_area = box1_area + box2_area - intersection_area
+
+ intersection_area / union_area
+ end
+
+ def boxes_overlap?(box1, box2)
+ !(box1.x + box1.w < box2.x || box2.x + box2.w < box1.x ||
+ box1.y + box1.h < box2.y || box2.y + box2.h < box1.y)
+ end
+
+ def increase_confidence_for_overlapping_fields(image_fields, text_fields, by: 1.0)
+ return image_fields if text_fields.blank?
+
+ image_fields.map do |image_field|
+ next if image_field.type != 'text'
+
+ field_bottom = image_field.y + image_field.h
+
+ text_fields.each do |text_field|
+ break if text_field.y > field_bottom
+
+ next if text_field.y + text_field.h < image_field.y
+
+ next unless boxes_overlap?(image_field, text_field) && calculate_iou(image_field, text_field) > 0.5
+
+ break image_field.confidence += by
+ end
+ end
+
+ image_fields
+ end
+ # rubocop:enable Metrics
+ end
+end
diff --git a/lib/templates/image_to_fields.rb b/lib/templates/image_to_fields.rb
new file mode 100755
index 00000000..ad747aff
--- /dev/null
+++ b/lib/templates/image_to_fields.rb
@@ -0,0 +1,331 @@
+# frozen_string_literal: true
+
+module Templates
+ module ImageToFields
+ module_function
+
+ Field = Struct.new(:type, :x, :y, :w, :h, :confidence, keyword_init: true)
+
+ MODEL_PATH = Rails.root.join('tmp/model_704_int8.onnx')
+
+ RESOLUTION = 704
+
+ ID_TO_CLASS = %w[text checkbox].freeze
+
+ MEAN = [0.485, 0.456, 0.406].freeze
+ STD = [0.229, 0.224, 0.225].freeze
+
+ CPU_THREADS = Etc.nprocessors
+
+ # rubocop:disable Metrics
+ def call(image, confidence: 0.3, nms: 0.1, temperature: 1,
+ split_page: false, aspect_ratio: true, padding: nil)
+ base_image = image.extract_band(0, n: 3)
+
+ trimmed_base, base_offset_x, base_offset_y = trim_image_with_padding(base_image, padding)
+
+ if split_page && image.height > image.width
+ half_h = trimmed_base.height / 2
+ top_h = half_h
+ bottom_h = trimmed_base.height - half_h
+
+ regions = [
+ { img: trimmed_base.crop(0, 0, trimmed_base.width, top_h), offset_y: 0 },
+ { img: trimmed_base.crop(0, top_h, trimmed_base.width, bottom_h), offset_y: top_h }
+ ]
+
+ detections = { xyxy: Numo::SFloat[], confidence: Numo::SFloat[], class_id: Numo::Int32[] }
+
+ detections = regions.reduce(detections) do |acc, r|
+ next detections if r[:img].height <= 0 || r[:img].width <= 0
+
+ input_tensor, transform_info = preprocess_image(r[:img], RESOLUTION, aspect_ratio:)
+
+ transform_info[:trim_offset_x] = base_offset_x
+ transform_info[:trim_offset_y] = base_offset_y + r[:offset_y]
+
+ outputs = model.predict({ 'input' => input_tensor })
+
+ postprocess_outputs(outputs, transform_info, acc, confidence:, temperature:)
+ end
+ else
+ input_tensor, transform_info = preprocess_image(trimmed_base, RESOLUTION, aspect_ratio:)
+
+ transform_info[:trim_offset_x] = base_offset_x
+ transform_info[:trim_offset_y] = base_offset_y
+
+ outputs = model.predict({ 'input' => input_tensor })
+
+ detections = postprocess_outputs(outputs, transform_info, confidence:, temperature:)
+ end
+
+ detections = apply_nms(detections, nms)
+
+ fields = Array.new(detections[:xyxy].shape[0]) do |i|
+ x1 = detections[:xyxy][i, 0]
+ y1 = detections[:xyxy][i, 1]
+ x2 = detections[:xyxy][i, 2]
+ y2 = detections[:xyxy][i, 3]
+
+ class_id = detections[:class_id][i].to_i
+
+ confidence = detections[:confidence][i]
+
+ x0_norm = x1 / image.width.to_f
+ y0_norm = y1 / image.height.to_f
+ x1_norm = x2 / image.width.to_f
+ y1_norm = y2 / image.height.to_f
+
+ type_name = ID_TO_CLASS[class_id]
+
+ Field.new(
+ type: type_name,
+ x: x0_norm,
+ y: y0_norm,
+ w: (x1_norm - x0_norm),
+ h: (y1_norm - y0_norm),
+ confidence:
+ )
+ end
+
+ sort_fields(fields, y_threshold: 10.0 / image.height)
+ end
+
+ def trim_image_with_padding(image, padding = 0)
+ return [image, 0, 0] if padding.nil?
+
+ left, top, trim_width, trim_height = image.find_trim(threshold: 10, background: [255, 255, 255])
+
+ padded_left = [left - padding, 0].max
+ padded_top = [top - padding, 0].max
+ padded_right = [left + trim_width + padding, image.width].min
+ padded_bottom = [top + trim_height + padding, image.height].min
+
+ width = padded_right - padded_left
+ height = padded_bottom - padded_top
+
+ trimmed_image = image.crop(padded_left, padded_top, width, height)
+
+ [trimmed_image, padded_left, padded_top]
+ end
+
+ def preprocess_image(image, resolution, aspect_ratio: false)
+ scale_x = resolution.to_f / image.width
+ scale_y = resolution.to_f / image.height
+
+ if aspect_ratio
+ scale = [scale_x, scale_y].min
+
+ new_width = (image.width * scale).round
+ new_height = (image.height * scale).round
+
+ resized = image.resize(scale, vscale: scale, kernel: :lanczos3)
+
+ pad_x = ((resolution - new_width) / 2.0).round
+ pad_y = ((resolution - new_height) / 2.0).round
+
+ image = resized.embed(pad_x, pad_y, resolution, resolution, background: [255, 255, 255])
+
+ transform_info = { scale_x: scale, scale_y: scale, pad_x: pad_x, pad_y: pad_y }
+ else
+ image = image.resize(scale_x, vscale: scale_y, kernel: :lanczos3)
+
+ transform_info = { scale_x: scale_x, scale_y: scale_y, pad_x: 0, pad_y: 0 }
+ end
+
+ image /= 255.0
+
+ image = (image - MEAN) / STD
+
+ pixel_data = image.write_to_memory
+
+ img_array = Numo::SFloat.from_binary(pixel_data, [resolution, resolution, 3])
+
+ img_array = img_array.transpose(2, 0, 1)
+
+ [img_array.reshape(1, 3, resolution, resolution), transform_info]
+ end
+
+ def nms(boxes, scores, iou_threshold = 0.5)
+ return Numo::Int32[] if boxes.shape[0].zero?
+
+ x1 = boxes[true, 0]
+ y1 = boxes[true, 1]
+ x2 = boxes[true, 2]
+ y2 = boxes[true, 3]
+
+ areas = (x2 - x1) * (y2 - y1)
+ order = scores.sort_index.reverse
+
+ keep = []
+
+ while order.size.positive?
+ i = order[0]
+ keep << i
+
+ break if order.size == 1
+
+ xx1 = Numo::SFloat.maximum(x1[i], x1[order[1..]])
+ yy1 = Numo::SFloat.maximum(y1[i], y1[order[1..]])
+ xx2 = Numo::SFloat.minimum(x2[i], x2[order[1..]])
+ yy2 = Numo::SFloat.minimum(y2[i], y2[order[1..]])
+
+ w = Numo::SFloat.maximum(0.0, xx2 - xx1)
+ h = Numo::SFloat.maximum(0.0, yy2 - yy1)
+
+ intersection = w * h
+
+ iou = intersection / (areas[i] + areas[order[1..]] - intersection)
+
+ inds = iou.le(iou_threshold).where
+
+ order = order[inds + 1]
+ end
+
+ Numo::Int32.cast(keep)
+ end
+
+ def postprocess_outputs(outputs, transform_info, detections = nil, confidence: 0.3, temperature: 1)
+ boxes = Numo::SFloat.cast(outputs['dets'])
+ logits = Numo::SFloat.cast(outputs['labels'])
+
+ boxes = boxes[0, true, true] # [300, 4]
+ logits = logits[0, true, true] # [300, num_classes]
+
+ scaled_logits = logits / temperature
+
+ probs = 1.0 / (1.0 + Numo::NMath.exp(-scaled_logits))
+
+ scores = probs.max(axis: 1)
+ labels = probs.argmax(axis: 1)
+
+ cx = boxes[true, 0]
+ cy = boxes[true, 1]
+ w = boxes[true, 2]
+ h = boxes[true, 3]
+
+ x1 = cx - (w / 2.0)
+ y1 = cy - (h / 2.0)
+ x2 = cx + (w / 2.0)
+ y2 = cy + (h / 2.0)
+
+ boxes_xyxy = Numo::SFloat.zeros(boxes.shape[0], 4)
+ boxes_xyxy[true, 0] = x1
+ boxes_xyxy[true, 1] = y1
+ boxes_xyxy[true, 2] = x2
+ boxes_xyxy[true, 3] = y2
+
+ boxes_xyxy *= RESOLUTION
+
+ pad_x = transform_info[:pad_x]
+ pad_y = transform_info[:pad_y]
+ boxes_xyxy[true, 0] -= pad_x
+ boxes_xyxy[true, 1] -= pad_y
+ boxes_xyxy[true, 2] -= pad_x
+ boxes_xyxy[true, 3] -= pad_y
+
+ scale_x = transform_info[:scale_x]
+ scale_y = transform_info[:scale_y]
+ boxes_xyxy[true, 0] /= scale_x
+ boxes_xyxy[true, 1] /= scale_y
+ boxes_xyxy[true, 2] /= scale_x
+ boxes_xyxy[true, 3] /= scale_y
+
+ trim_offset_x = transform_info[:trim_offset_x]
+ trim_offset_y = transform_info[:trim_offset_y]
+ boxes_xyxy[true, 0] += trim_offset_x
+ boxes_xyxy[true, 1] += trim_offset_y
+ boxes_xyxy[true, 2] += trim_offset_x
+ boxes_xyxy[true, 3] += trim_offset_y
+
+ keep_mask = scores.gt(confidence)
+
+ keep_indices = keep_mask.where
+
+ if keep_indices.empty?
+ detections || {
+ xyxy: Numo::SFloat[],
+ confidence: Numo::SFloat[],
+ class_id: Numo::Int32[]
+ }
+ else
+ scores = scores[keep_indices]
+ labels = labels[keep_indices]
+ boxes_xyxy = boxes_xyxy[keep_indices, true]
+
+ if detections
+ existing_n = detections[:xyxy].shape[0]
+ new_n = boxes_xyxy.shape[0]
+ total = existing_n + new_n
+
+ xyxy = Numo::SFloat.zeros(total, 4)
+ conf = Numo::SFloat.zeros(total)
+ cls = Numo::Int32.zeros(total)
+
+ if existing_n.positive?
+ xyxy[0...existing_n, true] = detections[:xyxy]
+ conf[0...existing_n] = detections[:confidence]
+ cls[0...existing_n] = detections[:class_id]
+ end
+
+ xyxy[existing_n...(existing_n + new_n), true] = boxes_xyxy
+ conf[existing_n...(existing_n + new_n)] = scores
+ cls[existing_n...(existing_n + new_n)] = Numo::Int32.cast(labels)
+
+ { xyxy: xyxy, confidence: conf, class_id: cls }
+ else
+ {
+ xyxy: boxes_xyxy,
+ confidence: scores,
+ class_id: Numo::Int32.cast(labels)
+ }
+ end
+ end
+ end
+
+ def sort_fields(fields, y_threshold: 0.01)
+ sorted_fields = fields.sort { |a, b| a.y == b.y ? a.x <=> b.x : a.y <=> b.y }
+
+ lines = []
+ current_line = []
+
+ sorted_fields.each do |field|
+ if current_line.blank? || (field.y - current_line.first.y).abs < y_threshold
+ current_line << field
+ else
+ lines << current_line.sort_by(&:x)
+
+ current_line = [field]
+ end
+ end
+
+ lines << current_line.sort_by(&:x) if current_line.present?
+
+ lines.flatten
+ end
+
+ def apply_nms(detections, threshold = 0.5)
+ return detections if detections[:xyxy].shape[0].zero?
+
+ keep_indices = nms(detections[:xyxy], detections[:confidence], threshold)
+
+ {
+ xyxy: detections[:xyxy][keep_indices, true],
+ confidence: detections[:confidence][keep_indices],
+ class_id: detections[:class_id][keep_indices]
+ }
+ end
+
+ def model
+ @model ||= OnnxRuntime::Model.new(
+ MODEL_PATH.to_s,
+ inter_op_num_threads: CPU_THREADS,
+ intra_op_num_threads: CPU_THREADS,
+ enable_mem_pattern: false,
+ enable_cpu_mem_arena: false,
+ providers: ['CPUExecutionProvider']
+ )
+ end
+ # rubocop:enable Metrics
+ end
+end