tashkeel_fixer under test v1

parent 34d57666
custom_fixes = { custom_fixes = {
"التكيف": "التَكَيُّف", "تكيف": "تَكَيُّف",
"البقاء": "البَقَّاء", "بقاء": "بَقَّااء",
"بقاء": "بَقَّاء", "قدرة": "قُدرَة",
"قدرة": "القُدرَة", "نقل": "نَقْل",
"النقل": "النَقْل", "دب": "دُبّ",
"الدب": "الدُبّ", "نمر": "نَمِر",
"النمر": "النَمِر",
"فرو": "فَروُ", "فرو": "فَروُ",
"البني": "البُنّي", "بني": "بُنّي",
"ملونة": "مِلوِنَةْ", "ملونة": "مِلوِنَةْ",
"قوس قزح": "قُوس قُزَح", "قوس قزح": "قُوس قُزَح",
"معينة": "مُعيَّنَة", "معينة": "مُعيَّنَة",
"الفنك": "الفنِك", "فنك": "فنِك",
"الحر": "الحَر", "حر": "حَر",
"الشم": "الَشَمْ", "شم": "شَمْ",
"البصر": "البَصَر", "بصر": "بَصَر",
"الأذن": "الاُذُن", "أذن": "اُذُن",
"الفم": "الفَم", "فم": "فَم",
"العين": "العِين", "عين": "عِين",
"اللهث": "اللَّهْث", "لهث": "لَّهْث",
"القطط": "القطط", "قطط": "قطط",
"لنقل": "لنَقْل", "لنقل": "لنَقْل",
"قدم": "قَدَمْ", "قدم": "قَدَمْ",
"مية": "مَيَّةْ", "مية": "مَيَّةْ",
"حاسة": "حاسة", "حاسة": "حاسة",
"دهون": "دُهون",
} }
import re
def apply_fixes(text, fixes_dict): def apply_fixes(text, fixes_dict):
for wrong, fixed in fixes_dict.items(): for wrong, fixed in fixes_dict.items():
if wrong in text: pattern = rf"\b(ال|يت|مت)?{wrong}(ات|ها|هم|ه|ي|ون|ين)?\b"
text = text.replace(wrong, fixed)
def replacer(m):
prefix = m.group(1) or ""
suffix = m.group(2) or ""
return prefix + fixed + suffix
text = re.sub(pattern, replacer, text)
return text return text
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment