import re import math # List of words which might lead to inaccurate translation DICTIONARY = { 'violated': 'raped', 'semen': 'cum' } def replace_english_words(text: str) -> str: return ' '.join(DICTIONARY.get(word, word) for word in text.split()) def remove_ass_statements(text: str) -> str: # Regular expression to match everything between curly braces (non-greedy) return re.sub(r'\{.*?\}', '', text) def remove_new_line_statements(text: str) -> str: return text.replace("\\N", " ") def split_sentence(sentence, n, translated_sentence): original_commas = sentence.count(',') translated_commas = translated_sentence.count(',') # Split by commas if commas align if original_commas != 0 and original_commas == translated_commas: return [chunk.strip() for chunk in translated_sentence.split(',')] # Otherwise, split into n roughly equal chunks words = translated_sentence.split() chunk_size = math.ceil(len(words) / n) chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] return chunks