EADST

Pytorch Q4_0 Quantize and Dequantize aligning with llama.cpp

Pytorch Q4_0 Quantize and Dequantize aligning with llama.cpp

import torch

# Check if CUDA is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

def q4_0_quantize_and_dequantize_tensor(tensor):
    tensor = tensor.to(dtype=torch.float32, device=device)

    # Reshape tensor to process each 32-value block independently
    orig_shape = tensor.shape
    tensor = tensor.view(-1, 32)

    # Find the maximum absolute value per block
    max_vals = torch.max(torch.abs(tensor), dim=1)[0]

    # Prevent division by zero
    max_vals[max_vals == 0] = 1.0

    # Calculate d and id for each block
    d = max_vals / -8.0
    ids = 1.0 / d

    # Scale and quantize tensor elements
    scaled_tensors = tensor * ids[:, None]
    quantized_tensors = torch.clamp(scaled_tensors + 8.5, 0, 15).to(torch.uint8)

    # Dequantize the tensor
    dequantized_tensors = (quantized_tensors.float() - 8.0) * d[:, None]

    # Reshape back to the original shape
    dequantized_tensors = dequantized_tensors.view(orig_shape).to(dtype=torch.float16)

    return dequantized_tensors

# Assuming 'model_part' is already loaded and on CPU
model_part = torch.load(f"your_model_path/pytorch_model.bin", map_location="cpu")
keywords = [
    "embed_tokens.weight",
    "self_attn.q_proj.weight",
    "self_attn.k_proj.weight",
    "self_attn.v_proj.weight",
    "self_attn.o_proj.weight",
    "mlp.up_proj.weight",
    "mlp.gate_proj.weight",
    "mlp.down_proj.weight",
    "lm_head.weight"
]
for name, data in model_part.items():
    for word in keywords:
        if word in name:
            # Quantize and dequantize the entire tensor
            model_part[name] = q4_0_quantize_and_dequantize_tensor(data)

# Save the updated model parts
torch.save(model_part, "pytorch_model_quantized.bin")

Reference:

相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
Paddle OpenAI mmap Video Website CEIR TSV InvalidArgumentError Bert QWEN GPTQ Sklearn CSV Miniforge Conda Python VPN 阿里云 Land Review BTC TensorFlow CTC Distillation UNIX TensorRT CAM PyTorch Hungarian 算法题 v2ray TTS PDF Pytorch Jupyter logger torchinfo C++ News API 报税 Markdown Baidu RAR Cloudreve Qwen Mixtral 云服务器 Freesound Template LeetCode Claude SQLite 递归学习法 MD5 Tensor Gemma 多线程 NameSilo Logo Password Nginx CV LaTeX 图标 版权 CC JSON Input PyCharm Permission VGG-16 Jetson 继承 Bin Datetime 音频 域名 Firewall Math GIT Agent Django Domain icon 强化学习 HaggingFace Streamlit Use Quantization Zip GPT4 Image2Text Llama 净利润 Clash Pickle YOLO Card OpenCV git SPIE Paper Search diffusers tar GGML Proxy Hilton llama.cpp 公式 Windows Docker 顶会 Numpy FP16 Food Data Linux Color WebCrawler Qwen2.5 Tiktoken Hotel 多进程 OCR VSCode FlashAttention 搞笑 RGB Bitcoin ResNet-50 Random 证件照 Statistics Plate Quantize XML Web DeepStream Google LoRA WAN Github ChatGPT FP32 Base64 v0.dev Git Magnet hf Rebuttal SVR 签证 Tracking SAM Interview Disk 飞书 GoogLeNet Breakpoint ModelScope 第一性原理 腾讯云 Pillow Transformers LLM Safetensors PDB HuggingFace scipy Animate Heatmap git-lfs 财报 Vmess Ubuntu NLP COCO Anaconda EXCEL Michelin tqdm Plotly Algorithm Shortcut 关于博主 CLAP CUDA printf Pandas Knowledge PIP Bipartite FastAPI Augmentation XGBoost Qwen2 BF16 Diagram Crawler transformers ONNX AI FP8 IndexTTS2 图形思考法 Vim DeepSeek uwsgi UI BeautifulSoup uWSGI SQL Translation Excel LLAMA Dataset FP64 Ptyhon Attention NLTK
站点统计

本站现有博文323篇,共被浏览795363

本站已经建立2493天!

热门文章
文章归档
回到顶部