Advertisement
gandalfbialy

Untitled

May 17th, 2025
632
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.13 KB | None | 0 0
  1. import json, random, math
  2.  
  3. def load_data(path):
  4.   with open(path, encoding="utf-8") as file:
  5.     return json.load(file)
  6.  
  7. def train_test_split(data, test_ratio=0.2):
  8.   random.shuffle(data)
  9.   cut = int(len(data) * (1-test_ratio))
  10.   print(f"Uczący zbiór: {len(data[:cut])}")
  11.   print(f"Testowy zbiór: {len(data[cut:])}")
  12.   return data[:cut], data[cut:]
  13.  
  14. def build_dictionary(train):
  15.   vocabulary = set()
  16.   for record in train:
  17.     vocabulary.update(record["tags"])
  18.   print(vocabulary)
  19.   return vocabulary
  20.  
  21. def train_nb(train, vocabulary):
  22.   class_counts = {}
  23.   word_counts = {}
  24.   total_words = {}
  25.  
  26.   for record in train:
  27.     c = record["label"]
  28.     class_counts[c] = class_counts.get(c, 0) + 1
  29.     word_counts.setdefault(c, {})
  30.     total_words.setdefault(c, 0)
  31.  
  32.     for tag in record["tags"]:
  33.       word_counts[c][tag] = word_counts[c].get(tag, 0) + 1
  34.       total_words[c] += 1
  35.  
  36.   model = {
  37.       "class_counts": class_counts,
  38.       "word_counts": word_counts,
  39.       "total_words": total_words,
  40.       "vocabulary": vocabulary,
  41.       "alpha": 1.0,
  42.       "total_docs": len(train)
  43.   }
  44.  
  45.   print(model)
  46.   return model
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement