EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
算法题 Vmess v0.dev PDF Use DeepStream PDB Diagram git 多进程 Freesound Numpy Streamlit MD5 RGB OpenCV YOLO Animate 顶会 阿里云 Vim Nginx PyCharm FP8 QWEN 腾讯云 版权 Conda CV Gemma VSCode Review Random Github CEIR Algorithm TSV Template HuggingFace Base64 XML SQL 第一性原理 Card WebCrawler Llama torchinfo Password BeautifulSoup Shortcut Hotel Qwen2 SAM TensorRT mmap Baidu Video 公式 Jetson Transformers GPT4 财报 Disk Miniforge FastAPI Tracking RAR Rebuttal PyTorch Heatmap Jupyter Quantize 递归学习法 Paper Input FP32 AI ResNet-50 CAM Distillation SPIE SQLite BTC Michelin Quantization 图形思考法 Plotly FP16 Docker PIP Clash Magnet Django 继承 transformers Food Windows Math Safetensors hf NLTK IndexTTS2 UI Git v2ray Qwen 多线程 Excel Permission API Zip Bin VGG-16 Search HaggingFace TensorFlow ONNX Data InvalidArgumentError LLAMA Website Image2Text uWSGI XGBoost 证件照 搞笑 Bert 关于博主 Sklearn Plate 飞书 Google 报税 图标 Ubuntu printf ModelScope Markdown Color NameSilo Qwen2.5 FP64 BF16 云服务器 Paddle VPN Python OCR WAN CSV 签证 LeetCode Tensor EXCEL GIT CC Crawler logger Agent Mixtral Proxy DeepSeek Translation CUDA GoogLeNet GPTQ Pandas NLP 论文速读 diffusers Bitcoin git-lfs Hungarian Pickle Land Logo OpenAI Claude SVR COCO icon ChatGPT Ptyhon JSON LoRA Cloudreve Statistics Interview Dataset LaTeX LLM Pytorch 论文 Firewall llama.cpp tqdm CLAP 净利润 GGML 域名 音频 UNIX Linux News Datetime tar Anaconda Breakpoint Knowledge uwsgi 强化学习 CTC scipy C++ Hilton Domain Bipartite Attention TTS FlashAttention Tiktoken Pillow Web Augmentation
站点统计

本站现有博文327篇,共被浏览833385

本站已经建立2538天!

热门文章
文章归档
回到顶部