tashkeel_fixer under test v1

parent 34d57666
custom_fixes = {
"التكيف": "التَكَيُّف",
"البقاء": "البَقَّاء",
"بقاء": "بَقَّاء",
"قدرة": "القُدرَة",
"النقل": "النَقْل",
"الدب": "الدُبّ",
"النمر": "النَمِر",
"تكيف": "تَكَيُّف",
"بقاء": "بَقَّااء",
"قدرة": "قُدرَة",
"نقل": "نَقْل",
"دب": "دُبّ",
"نمر": "نَمِر",
"فرو": "فَروُ",
"البني": "البُنّي",
"بني": "بُنّي",
"ملونة": "مِلوِنَةْ",
"قوس قزح": "قُوس قُزَح",
"معينة": "مُعيَّنَة",
"الفنك": "الفنِك",
"الحر": "الحَر",
"الشم": "الَشَمْ",
"البصر": "البَصَر",
"الأذن": "الاُذُن",
"الفم": "الفَم",
"العين": "العِين",
"اللهث": "اللَّهْث",
"القطط": "القطط",
"فنك": "فنِك",
"حر": "حَر",
"شم": "شَمْ",
"بصر": "بَصَر",
"أذن": "اُذُن",
"فم": "فَم",
"عين": "عِين",
"لهث": "لَّهْث",
"قطط": "قطط",
"لنقل": "لنَقْل",
"قدم": "قَدَمْ",
"مية": "مَيَّةْ",
"حاسة": "حاسة",
"دهون": "دُهون",
}
import re
def apply_fixes(text, fixes_dict):
for wrong, fixed in fixes_dict.items():
if wrong in text:
text = text.replace(wrong, fixed)
pattern = rf"\b(ال|يت|مت)?{wrong}(ات|ها|هم|ه|ي|ون|ين)?\b"
def replacer(m):
prefix = m.group(1) or ""
suffix = m.group(2) or ""
return prefix + fixed + suffix
text = re.sub(pattern, replacer, text)
return text
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment