Advertisement
RobertDeMilo

Сортировка вектора структур

Oct 17th, 2023
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.60 KB | None | 0 0
  1. #include <algorithm>
  2. #include <iostream>
  3. #include <set>
  4. #include <string>
  5. #include <utility>
  6. #include <vector>
  7.  
  8. using namespace std;
  9.  
  10. const int MAX_RESULT_DOCUMENT_COUNT = 5;
  11.  
  12. struct DocumentContent
  13. {
  14.     int id = 0;
  15.     vector<string> words;
  16. };
  17.  
  18. struct Document
  19. {
  20.     int id = 0;
  21.     int relevance = 0;
  22. };
  23.  
  24. string ReadLine() {
  25.     string s;
  26.     getline(cin, s);
  27.     return s;
  28. }
  29.  
  30. int ReadLineWithNumber() {
  31.     int result = 0;
  32.     cin >> result;
  33.     ReadLine();
  34.     return result;
  35. }
  36.  
  37. vector<string> SplitIntoWords(const string& text) {
  38.     vector<string> words;
  39.     string word;
  40.     for (const char c : text) {
  41.         if (c == ' ') {
  42.             if (!word.empty()) {
  43.                 words.push_back(word);
  44.                 word.clear();
  45.             }
  46.         }
  47.         else {
  48.             word += c;
  49.         }
  50.     }
  51.     if (!word.empty()) {
  52.         words.push_back(word);
  53.     }
  54.  
  55.     return words;
  56. }
  57.  
  58. set<string> ParseStopWords(const string& text) {
  59.     set<string> stop_words;
  60.     for (const string& word : SplitIntoWords(text)) {
  61.         stop_words.insert(word);
  62.     }
  63.     return stop_words;
  64. }
  65.  
  66. vector<string> SplitIntoWordsNoStop(const string& text, const set<string>& stop_words) {
  67.     vector<string> words;
  68.     for (const string& word : SplitIntoWords(text)) {
  69.         if (stop_words.count(word) == 0) {
  70.             words.push_back(word);
  71.         }
  72.     }
  73.     return words;
  74. }
  75.  
  76. void AddDocument(vector<DocumentContent>& documents, const set<string>& stop_words, int document_id,
  77.     const string& document) {
  78.     const vector<string> words = SplitIntoWordsNoStop(document, stop_words);
  79.     documents.push_back({ document_id, words });
  80. }
  81.  
  82. set<string> ParseQuery(const string& text, const set<string>& stop_words) {
  83.     set<string> query_words;
  84.     for (const string& word : SplitIntoWordsNoStop(text, stop_words)) {
  85.         query_words.insert(word);
  86.     }
  87.     return query_words;
  88. }
  89.  
  90. int MatchDocument(const DocumentContent& content, const set<string>& query_words) {
  91.     if (query_words.empty()) {
  92.         return 0;
  93.     }
  94.     set<string> matched_words;
  95.     for (const string& word : content.words) {
  96.         if (matched_words.count(word) != 0) {
  97.             continue;
  98.         }
  99.         if (query_words.count(word) != 0) {
  100.             matched_words.insert(word);
  101.         }
  102.     }
  103.     return static_cast<int>(matched_words.size());
  104. }
  105.  
  106. // Для каждого документа возвращает его релевантность и id
  107. /*vector<pair<int, int>> FindAllDocuments(const vector<DocumentContent>& documents,
  108.     const set<string>& query_words) */
  109. vector<Document> FindAllDocuments(const vector<DocumentContent>& documents,
  110.     const set<string>& query_words)
  111. {
  112.     //vector<pair<int, int>> matched_documents;
  113.     vector<Document> matched_documents;
  114.     for (const auto& document : documents) {
  115.         const int relevance = MatchDocument(document, query_words);
  116.         if (relevance > 0) {
  117.             matched_documents.push_back({ document.id, relevance });
  118.         }
  119.     }
  120.     return matched_documents;
  121. }
  122.  
  123. // Возвращает топ-5 самых релевантных документов
  124. //vector<pair<int, int>> FindTopDocuments(const vector<DocumentContent>& documents,
  125. //    const set<string>& stop_words, const string& raw_query)
  126. vector<Document> FindTopDocuments(const vector<DocumentContent>& documents,
  127.     const set<string>& stop_words, const string& raw_query)
  128. {
  129.     const set<string> query_words = ParseQuery(raw_query, stop_words);
  130.     auto matched_documents = FindAllDocuments(documents, query_words);
  131.     // теперь id, relevance
  132.  
  133.     sort(matched_documents.begin(), matched_documents.end(),
  134.         [](const Document& lhs, const Document& rhs) {return lhs.relevance > rhs.relevance; });
  135.  
  136.     if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
  137.         matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
  138.     }
  139.    
  140.     return matched_documents;
  141. }
  142.  
  143. int main() {
  144.     const string stop_words_joined = ReadLine();
  145.     const set<string> stop_words = ParseStopWords(stop_words_joined);
  146.  
  147.     // Read documents
  148.     vector<DocumentContent> documents;
  149.     const int document_count = ReadLineWithNumber();
  150.     for (int document_id = 0; document_id < document_count; ++document_id) {
  151.         AddDocument(documents, stop_words, document_id, ReadLine());
  152.     }
  153.  
  154.     const string query = ReadLine();
  155.     for (auto [document_id, relevance] : FindTopDocuments(documents, stop_words, query)) {
  156.         cout << "{ document_id = "s << document_id << ", relevance = "s << relevance << " }"s
  157.             << endl;
  158.     }
  159. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement