Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def extract_call_segments(full_id):
- """
- Extracts the first three underscore-separated segments from a call ID.
- For example:
- Input: "0240828_31657_2958059406I0L412_20240828_084737_123427680"
- Output: "0240828_31657_2958059406I0L412"
- Args:
- full_id (str): The complete call ID string
- Returns:
- str: The first three segments joined by underscores
- """
- # Split the string by underscores
- segments = full_id.split("_")
- # Take only the first three segments and join them back with underscores
- return "_".join(segments[:3])
- def process_transcription(json_content):
- """
- Konwertuje transkrypcję z formatu JSON na sformatowany tekst
- z oznaczeniami mówiącego i czasem. Dodatkowo wyodrębnia call_id
- ze ścieżki źródłowej.
- """
- # Parsowanie JSONa
- data = (
- json.loads(json_content)
- if isinstance(json_content, str)
- else json_content
- )
- # Wyodrębnienie call_id ze ścieżki źródłowej
- source_url = data.get("source", "")
- call_id = ""
- if "speechstudiofilename=" in source_url:
- # Znajdujemy część URL po "speechstudiofilename="
- full_id = source_url.split("speechstudiofilename=")[-1]
- # Usuwamy rozszerzenie .wav jeśli występuje
- full_id = full_id.replace(".wav", "")
- # Wyodrębniamy tylko pierwsze trzy segmenty
- call_id = extract_call_segments(full_id)
- phrases = []
- for phrase in data.get("recognizedPhrases", []):
- channel = phrase.get("channel", 0)
- speaker = "Agent" if channel == 1 else "Klient"
- offset_ticks = float(phrase.get("offsetInTicks", 0))
- timestamp = convert_ticks_to_timestamp(offset_ticks)
- if phrase.get("nBest") and phrase["nBest"][0].get("display"):
- text = phrase["nBest"][0]["display"].strip()
- if text and text != "Napisy stworzone przez społeczność Amara.org":
- phrases.append((offset_ticks, f"{speaker} {timestamp} {text}"))
- phrases.sort(key=lambda x: x[0])
- formatted_text = "\n".join(phrase[1] for phrase in phrases)
- return formatted_text, call_id
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement