[{"data":1,"prerenderedAt":157},["ShallowReactive",2],{"project-reader-khmer-ocr-tts":3},{"id":4,"title":5,"body":6,"category":129,"date":130,"description":131,"extension":132,"featured":133,"githubUrl":134,"images":135,"liveUrl":122,"longDescription":139,"meta":140,"navigation":141,"path":142,"seo":143,"slug":144,"stem":145,"tech":146,"thumbnail":155,"__hash__":156},"projects\u002Fprojects\u002Fkhmer-ocr-tts.md","Khmer Text-to-Speech & OCR System",{"type":7,"value":8,"toc":121},"minimark",[9,13,17,22,25,29,70,74,106,110],[10,11,5],"h1",{"id":12},"khmer-text-to-speech-ocr-system",[14,15,16],"p",{},"An end-to-end Khmer OCR extraction and Text-to-Speech pipeline using transfer learning and advanced image preprocessing techniques.",[18,19,21],"h2",{"id":20},"project-overview","Project Overview",[14,23,24],{},"This project addresses the challenge of Khmer text recognition and speech synthesis under limited-resource conditions. By leveraging transfer learning and advanced preprocessing, the system achieves robust performance on real-world documents.",[18,26,28],{"id":27},"key-features","Key Features",[30,31,32,40,46,52,58,64],"ul",{},[33,34,35,39],"li",{},[36,37,38],"strong",{},"Khmer OCR",": Advanced optical character recognition optimized for Khmer script",[33,41,42,45],{},[36,43,44],{},"Text-to-Speech",": Natural speech synthesis for Khmer language",[33,47,48,51],{},[36,49,50],{},"Image Preprocessing",": Denoising, binarization, and geometric correction for enhanced OCR accuracy",[33,53,54,57],{},[36,55,56],{},"Transfer Learning",": Adapted pre-trained vision-language and speech models for Khmer",[33,59,60,63],{},[36,61,62],{},"CUDA Optimization",": GPU-accelerated training and inference for reduced latency",[33,65,66,69],{},[36,67,68],{},"RESTful API",": FastAPI-based scalable model serving",[18,71,73],{"id":72},"technical-stack","Technical Stack",[30,75,76,82,88,94,100],{},[33,77,78,81],{},[36,79,80],{},"Backend",": FastAPI for RESTful API development",[33,83,84,87],{},[36,85,86],{},"ML Framework",": PyTorch with CUDA acceleration",[33,89,90,93],{},[36,91,92],{},"Models",": HuggingFace Transformers for vision-language and speech models",[33,95,96,99],{},[36,97,98],{},"Image Processing",": OpenCV for preprocessing pipeline",[33,101,102,105],{},[36,103,104],{},"Deployment",": Optimized for production serving with low latency",[18,107,109],{"id":108},"achievements","Achievements",[30,111,112,115,118],{},[33,113,114],{},"Successfully adapted large language models for low-resource Khmer language",[33,116,117],{},"Achieved high accuracy on real-world document images",[33,119,120],{},"Built scalable API for integration with downstream applications",{"title":122,"searchDepth":123,"depth":123,"links":124},"",2,[125,126,127,128],{"id":20,"depth":123,"text":21},{"id":27,"depth":123,"text":28},{"id":72,"depth":123,"text":73},{"id":108,"depth":123,"text":109},"machine-learning","2025-12-01","End-to-end Khmer OCR extraction and Text-to-Speech pipeline using transfer learning and advanced image preprocessing","md",true,"https:\u002F\u002Fgithub.com\u002Fchhaytheanly\u002FKhmer-OCR-Tesseract",[136,137,138],"\u002Fimages\u002Fprojects\u002Fkhmer-ocr-tts-1.jpg","\u002Fimages\u002Fprojects\u002Fkhmer-ocr-tts-2.jpg","\u002Fimages\u002Fprojects\u002Fkhmer-ocr-tts-3.jpg","Designed and implemented an end-to-end Khmer OCR extraction and Text-to-Speech (TTS) pipeline under limited-resource conditions. Applied transfer learning on pre-trained vision-language and speech models (HuggingFace) to adapt them for Khmer text recognition and speech synthesis. Built advanced image preprocessing techniques (denoising, binarization, geometric correction) to enhance OCR performance on real-world documents. Optimized training and inference using PyTorch with CUDA acceleration, reducing inference latency and improving deployment efficiency. Developed RESTful APIs using FastAPI for scalable model serving and integration with downstream applications.",{},null,"\u002Fprojects\u002Fkhmer-ocr-tts",{"title":5,"description":131},"khmer-ocr-tts","projects\u002Fkhmer-ocr-tts",[147,148,149,150,151,152,153,154],"Python","FastAPI","HuggingFace Transformers","PyTorch","CUDA","OpenCV","OCR","TTS","\u002Fproject\u002Fkhmer-arn.png","JkP2c5c2mBpyY1CuUh1E5xJ1LVgo08lFUZZj6gu9PtQ",1775448379767]