EADST

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

import torch

# Check if CUDA is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

def q4_1_quantize_and_dequantize_tensor(tensor):
    tensor = tensor.to(dtype=torch.float32, device=device)

    # Reshape tensor to process each 4-value block independently
    orig_shape = tensor.shape
    tensor = tensor.view(-1, 32)

    # Find the min and max values per block
    min_vals = torch.min(tensor, dim=1)[0]
    max_vals = torch.max(tensor, dim=1)[0]

    # Calculate scale d for each block
    d = (max_vals - min_vals) / (2**4 - 1)
    d[d == 0] = 1.0  # Prevent division by zero

    # Calculate inverse of d
    ids = 1.0 / d

    # Quantize tensor elements
    quantized_tensors = (tensor - min_vals[:, None]) * ids[:, None]

    # Clamp values to be between 0 and 15 (for 4 bits)
    quantized_tensors = torch.clamp(quantized_tensors + 0.5, 0, 15).to(torch.uint8)

    # Dequantize the tensor
    dequantized_tensors = (quantized_tensors.float() * d[:, None]) + min_vals[:, None]

    # Reshape back to the original shape
    dequantized_tensors = dequantized_tensors.view(orig_shape).to(dtype=torch.float16)

    return dequantized_tensors

# Assuming 'model_part' is already loaded and on CPU
model_part = torch.load(f"your_model_path/pytorch_model.bin", map_location="cpu")
keywords = [
    "embed_tokens.weight",
    "self_attn.q_proj.weight",
    "self_attn.k_proj.weight",
    "self_attn.v_proj.weight",
    "self_attn.o_proj.weight",
    "mlp.up_proj.weight",
    "mlp.gate_proj.weight",
    "mlp.down_proj.weight",
    "lm_head.weight"
]
for name, data in model_part.items():
    for word in keywords:
        if word in name:
            # Quantize and dequantize the entire tensor
            model_part[name] = q4_1_quantize_and_dequantize_tensor(data)

# Save the updated model parts
torch.save(model_part, "pytorch_model_quantized.bin")

Reference:

相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
OpenAI Excel Logo icon LLAMA OCR ChatGPT FP16 CSV OpenCV 版权 logger Baidu 继承 C++ RAR hf Animate Augmentation llama.cpp Breakpoint HuggingFace MD5 Bipartite API mmap Numpy PyCharm WAN 多进程 BF16 CAM TTS Safetensors LoRA SQL Pandas Streamlit Ubuntu CUDA Quantize Bert Color NameSilo Tiktoken SAM Proxy Review Random Website IndexTTS2 Jetson Zip Statistics Qwen UI News Nginx Attention Translation Tensor v2ray LeetCode 腾讯云 ResNet-50 Shortcut Plate 报税 diffusers SQLite git uwsgi Interview Search XML Paddle XGBoost 阿里云 HaggingFace GPT4 Docker Vim PDB Tracking Web SVR Transformers Bin 递归学习法 Django BTC Heatmap 云服务器 Data Card Image2Text Hilton 图形思考法 净利润 Sklearn 财报 Plotly YOLO GIT Quantization Template 公式 Land DeepStream printf CC PDF Use Michelin Google Food BeautifulSoup git-lfs FP8 VPN VGG-16 GoogLeNet CV Cloudreve CLAP NLP Diagram 顶会 强化学习 DeepSeek Pickle Password Mixtral Miniforge AI Crawler QWEN torchinfo Permission 证件照 多线程 Qwen2.5 COCO Windows TSV FastAPI transformers EXCEL Math 飞书 Jupyter CEIR Input 算法题 Base64 Llama Hotel Dataset 关于博主 Gemma VSCode Domain ONNX Agent 签证 TensorFlow v0.dev InvalidArgumentError Claude 图标 Clash CTC Knowledge Distillation 音频 LaTeX uWSGI Qwen2 Video Rebuttal SPIE Datetime RGB Vmess Ptyhon TensorRT Freesound 域名 GPTQ Paper Bitcoin JSON Magnet FP32 Github PIP Disk Algorithm Hungarian tqdm 搞笑 Linux LLM Conda Markdown Anaconda Python Pytorch PyTorch UNIX 第一性原理 Firewall GGML NLTK Pillow FP64 ModelScope WebCrawler scipy FlashAttention Git tar
站点统计

本站现有博文323篇,共被浏览795391

本站已经建立2493天!

热门文章
文章归档
回到顶部