taskeel fixer v2

parent c79b6813
...@@ -22,20 +22,68 @@ custom_fixes = { ...@@ -22,20 +22,68 @@ custom_fixes = {
"لنقل": "لنَقْل", "لنقل": "لنَقْل",
"قدم": "قَدَمْ", "قدم": "قَدَمْ",
"مية": "مَيَّةْ", "مية": "مَيَّةْ",
"حاسة": "حاسة", "حاسِّة": "حاسة",
"دهون": "دُهون", "دهون": "دُهون",
"طاقة": "طاقّةْ",
"اللي": "اللِّي",
"نزل": "نِزْل",
"شفت": "شُفْت",
"تل": "تَل",
"مخزن": "مَخزَّن",
"زنبرك": "زُنبرُك",
"ايه": "إِيه",
"أيه": "إِيه",
"خَلِية": "خلية",
"بيضة": "بِيضةْ",
"جنين": "جَنين",
"نسل": "نَسْل",
"ذكر": "ذَكَر",
"جوا": "جُوَّا",
"قولي": "قُولِّي",
"زي": "زَيّ",
"يخصب": "يِخَصَب",
"يخرج": "يِخرُج",
"تكاثر": "تَكاثُر",
"بابا": "بَابَا",
"ماما": "مَامَا",
"اساعد": "أَساعد",
"اكسجين": "اُكسُچين",
"وصل": "وِصِل",
"ولد": "وَلَد",
"انثى": "أُنثى",
"الميكروسكوب": "المايكروسكوب",
"تندمج": "تَنْدَمِج",
"توضيح": "تَوْضيح",
"الشعر": "الشَعر",
"زرقاء": "زَرْقاء",
"اشقر": "أَشْقَر",
"تكبر": "تِكبر",
"حلقة": "حَلَقَةْ",
"حبة": "حَبَةْ",
"يربط": "يِرْبُط",
"كيس": "كِيس",
"مرن": "مَرِن",
"نتناول": "نَتَنَاوَل",
"ننتقل": "تَنَتَقِل",
"قصة": "قِصةْ",
"تختبر": "تَخَتَبِر",
"إزيك": "إزَيَّك",
} }
import re import re
def apply_fixes(text, fixes_dict): def apply_fixes(text, fixes_dict):
prefixes = "(ال|يت|مت|بـ|بي|بن)?"
suffixes = "(ات|ها|هم|ه|ي|ون|ين|ة|ً)?"
for wrong, fixed in fixes_dict.items(): for wrong, fixed in fixes_dict.items():
pattern = rf"\b(ال|يت|مت)?{wrong}(ات|ها|هم|ه|ي|ون|ين)?\b" pattern = rf"\b{prefixes}{wrong}{suffixes}\b"
def replacer(m): def replacer(m):
prefix = m.group(1) or "" prefix = m.group(1) or ""
suffix = m.group(2) or "" suffix = m.group(2) or ""
return prefix + fixed + suffix return prefix + fixed + suffix
text = re.sub(pattern, replacer, text) text = re.sub(pattern, replacer, text)
return text return text
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment