EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
Pandas Markdown 云服务器 财报 图标 GoogLeNet Video Animate SQLite Plotly Excel Pickle DeepSeek transformers 腾讯云 Paper Mixtral 继承 飞书 TSV Django Bin FP16 GIT XGBoost Domain SQL Heatmap Interview Magnet Random git-lfs Vmess Jupyter QWEN CEIR Nginx Docker Color ResNet-50 Tiktoken 域名 Ptyhon DeepStream Cloudreve llama.cpp API torchinfo CV Git PyCharm BF16 Hilton AI Web CTC BTC Numpy Data TensorRT LLAMA PDF LaTeX Card Baidu Use Review Base64 YOLO Python mmap OCR Transformers Search Statistics Google Vim Qwen2 News Password 第一性原理 Translation GPTQ Streamlit Distillation UNIX Bert Breakpoint 证件照 GPT4 净利润 printf WAN ModelScope VSCode Llama Tensor FastAPI 音频 LoRA PyTorch Jetson 顶会 Disk RGB Datetime Claude WebCrawler FP32 ONNX SPIE Image2Text Zip PDB Firewall hf v2ray Website icon Proxy VGG-16 Attention MD5 Quantize HuggingFace CAM InvalidArgumentError logger TensorFlow UI RAR LLM Bitcoin CC PIP 强化学习 Crawler 图形思考法 CLAP C++ 阿里云 算法题 IndexTTS2 HaggingFace OpenCV FP8 Gemma 报税 Clash GGML Ubuntu 签证 FP64 SAM JSON CSV 多线程 XML CUDA Pillow tar Conda OpenAI Anaconda LeetCode Sklearn 多进程 Math uWSGI diffusers Plate Augmentation tqdm Linux NLP COCO Qwen2.5 关于博主 Logo VPN Dataset Safetensors Bipartite Land 公式 Shortcut Diagram Rebuttal Knowledge Tracking Input SVR Paddle Template Algorithm NLTK 版权 Github Permission Hotel NameSilo scipy Miniforge Michelin 搞笑 ChatGPT Windows Food Hungarian Quantization Freesound BeautifulSoup Qwen EXCEL TTS FlashAttention uwsgi 递归学习法 Pytorch git Agent v0.dev
站点统计

本站现有博文323篇,共被浏览795264

本站已经建立2493天!

热门文章
文章归档
回到顶部