RobertDeMilo

Ядро поисковой системы

Oct 21st, 2023
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.15 KB | None | 0 0
  1. #include <algorithm>
  2. #include <iostream>
  3. #include <set>
  4. #include <string>
  5. #include <utility>
  6. #include <vector>
  7.  
  8. using namespace std;
  9.  
  10. string ReadLine() {
  11.     string s;
  12.     getline(cin, s);
  13.     return s;
  14. }
  15.  
  16. int ReadLineWithNumber() {
  17.     int result = 0;
  18.     cin >> result;
  19.     ReadLine();
  20.     return result;
  21. }
  22.  
  23. vector<string> SplitIntoWords(const string& text) {
  24.     vector<string> words;
  25.     string word;
  26.     for (const char c : text) {
  27.         if (c == ' ') {
  28.             if (!word.empty()) {
  29.                 words.push_back(word);
  30.                 word.clear();
  31.             }
  32.         } else {
  33.             word += c;
  34.         }
  35.     }
  36.     if (!word.empty()) {
  37.         words.push_back(word);
  38.     }
  39.  
  40.     return words;
  41. }
  42.  
  43. set<string> ParseStopWords(const string& text) {
  44.     set<string> stop_words;
  45.     for (const string& word : SplitIntoWords(text)) {
  46.         stop_words.insert(word);
  47.     }
  48.     return stop_words;
  49. }
  50.  
  51. vector<string> SplitIntoWordsNoStop(const string& text, const set<string>& stop_words) {
  52.     vector<string> words;
  53.     for (const string& word : SplitIntoWords(text)) {
  54.         if (stop_words.count(word) == 0) {
  55.             words.push_back(word);
  56.         }
  57.     }
  58.     return words;
  59. }
  60.  
  61. void AddDocument(vector<vector<string>>& documents, const set<string>& stop_words,
  62.                  const string& document) {
  63.     const vector<string> words = SplitIntoWordsNoStop(document, stop_words);
  64.     documents.push_back(words);
  65. }
  66.  
  67. set<string> ParseQuery(const string& text, const set<string>& stop_words) {
  68.     set<string> query_words;
  69.     for (const string& word : SplitIntoWordsNoStop(text, stop_words)) {
  70.         query_words.insert(word);
  71.     }
  72.     return query_words;
  73. }
  74.  
  75. bool MatchDocument(const vector<string>& document_words, const set<string>& query_words) {
  76.     for (const string& word : document_words) {
  77.         if (query_words.count(word) != 0) {
  78.             return true;
  79.         }
  80.     }
  81.     return false;
  82. }
  83.  
  84. // Для каждого найденного документа возвращает его id
  85. vector<int> FindDocuments(const vector<vector<string>>& documents, const set<string>& stop_words,
  86.                           const string& query) {
  87.     const set<string> query_words = ParseQuery(query, stop_words);
  88.     vector<int> matched_documents;
  89.     int document_id = 0;
  90.     for (const auto& document : documents) {
  91.         if (MatchDocument(document, query_words)) {
  92.             matched_documents.push_back(document_id);
  93.         }
  94.         ++document_id;
  95.     }
  96.     return matched_documents;
  97. }
  98.  
  99. int main() {
  100.     const string stop_words_joined = ReadLine();
  101.     const set<string> stop_words = ParseStopWords(stop_words_joined);
  102.  
  103.     // Read documents
  104.     vector<vector<string>> documents;
  105.     const int document_count = ReadLineWithNumber();
  106.     for (int document_id = 0; document_id < document_count; ++document_id) {
  107.         AddDocument(documents, stop_words, ReadLine());
  108.     }
  109.  
  110.     const string query = ReadLine();
  111.     for (const int document_id : FindDocuments(documents, stop_words, query)) {
  112.         cout << "{ document_id = "s << document_id << " }"s << endl;
  113.     }
  114. }
Add Comment
Please, Sign In to add comment