Advertisement
tills

wyodrebnienie call_id

Jan 8th, 2025
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.17 KB | None | 0 0
  1. def extract_call_segments(full_id):
  2.     """
  3.    Extracts the first three underscore-separated segments from a call ID.
  4.    For example:
  5.    Input: "0240828_31657_2958059406I0L412_20240828_084737_123427680"
  6.    Output: "0240828_31657_2958059406I0L412"
  7.  
  8.    Args:
  9.        full_id (str): The complete call ID string
  10.  
  11.    Returns:
  12.        str: The first three segments joined by underscores
  13.    """
  14.     # Split the string by underscores
  15.     segments = full_id.split("_")
  16.  
  17.     # Take only the first three segments and join them back with underscores
  18.     return "_".join(segments[:3])
  19.  
  20.  
  21. def process_transcription(json_content):
  22.     """
  23.    Konwertuje transkrypcję z formatu JSON na sformatowany tekst
  24.    z oznaczeniami mówiącego i czasem. Dodatkowo wyodrębnia call_id
  25.    ze ścieżki źródłowej.
  26.    """
  27.     # Parsowanie JSONa
  28.     data = (
  29.         json.loads(json_content)
  30.         if isinstance(json_content, str)
  31.         else json_content
  32.     )
  33.  
  34.     # Wyodrębnienie call_id ze ścieżki źródłowej
  35.     source_url = data.get("source", "")
  36.     call_id = ""
  37.     if "speechstudiofilename=" in source_url:
  38.         # Znajdujemy część URL po "speechstudiofilename="
  39.         full_id = source_url.split("speechstudiofilename=")[-1]
  40.         # Usuwamy rozszerzenie .wav jeśli występuje
  41.         full_id = full_id.replace(".wav", "")
  42.         # Wyodrębniamy tylko pierwsze trzy segmenty
  43.         call_id = extract_call_segments(full_id)
  44.  
  45.     phrases = []
  46.  
  47.     for phrase in data.get("recognizedPhrases", []):
  48.         channel = phrase.get("channel", 0)
  49.         speaker = "Agent" if channel == 1 else "Klient"
  50.  
  51.         offset_ticks = float(phrase.get("offsetInTicks", 0))
  52.         timestamp = convert_ticks_to_timestamp(offset_ticks)
  53.  
  54.         if phrase.get("nBest") and phrase["nBest"][0].get("display"):
  55.             text = phrase["nBest"][0]["display"].strip()
  56.  
  57.             if text and text != "Napisy stworzone przez społeczność Amara.org":
  58.                 phrases.append((offset_ticks, f"{speaker} {timestamp} {text}"))
  59.  
  60.     phrases.sort(key=lambda x: x[0])
  61.     formatted_text = "\n".join(phrase[1] for phrase in phrases)
  62.  
  63.     return formatted_text, call_id
  64.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement