Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def train_nb(train, vocabulary):
- class_counts = {}
- word_counts = {}
- total_words = {}
- for record in train:
- c = record["label"]
- class_counts[c] = class_counts.get(c, 0) + 1
- word_counts.setdefault(c, {})
- total_words.setdefault(c, 0)
- for tag in record["tags"]:
- word_counts[c][tag] = word_counts[c].get(tag, 0) + 1
- total_words[c] += 1
- model = {
- "class_counts": class_counts,
- "word_counts": word_counts,
- "total_words": total_words,
- "vocab": vocabulary,
- "alpha": 1.0,
- "total_docs": len(train)
- }
- print(model)
- return model
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement