Advertisement
tills

testDAta

May 2nd, 2025
207
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 8.23 KB | None | 0 0
  1. /**
  2.  * Generuje dane testowe dla tabeli lrx_panel i tworzy tabelę tymczasową
  3.  */
  4. def generatePanelData(spark: SparkSession): Unit = {
  5.   import spark.implicits._
  6.  
  7.   // Dwa sklepy: jeden dobry, jeden zły
  8.   val panelDF = Seq(1, 2).toDF("shop_id")
  9.  
  10.   // Tworzenie tabeli tymczasowej
  11.   panelDF.createOrReplaceTempView("lrx_panel")
  12.  
  13.   println("Tabela lrx_panel utworzona z 2 sklepami")
  14. }
  15.  
  16. /**
  17.  * Generuje dane testowe dla tabeli dim_pack i tworzy tabelę tymczasową
  18.  */
  19. def generateDimPackData(spark: SparkSession): Unit = {
  20.   import spark.implicits._
  21.  
  22.   // Dane z lekami RX i OTC - teraz używamy int dla pack_id
  23.   val data = Seq(
  24.     (1, "RX", 20210101, 20211231),
  25.     (2, "RX", 20210101, 20211231),
  26.     (3, "OTC", 20210101, 20211231)
  27.   )
  28.  
  29.   val dimPackDF = data.toDF("pack_id", "basket_4_code", "day_id_from", "day_id_to")
  30.  
  31.   // Tworzenie tabeli tymczasowej
  32.   dimPackDF.createOrReplaceTempView("dim_pack")
  33.  
  34.   println("Tabela dim_pack utworzona z 3 produktami")
  35. }
  36.  
  37. /**
  38.  * Generuje dane testowe sprzedaży i zwraca DataFrame
  39.  */
  40. def generateSalesData(spark: SparkSession): DataFrame = {
  41.   import spark.implicits._
  42.  
  43.   // Dane dla dobrego sklepu (sklep_id=1)
  44.   val goodShopData = Seq(
  45.     // miesiąc 1 - 202101 - stałe wartości (20 pacjentów, głównie RX)
  46.     (1, 202101, 1, 101, 1, 20210115, 1),
  47.     (1, 202101, 2, 102, 1, 20210115, 1),
  48.     (1, 202101, 3, 103, 1, 20210115, 1),
  49.     (1, 202101, 4, 104, 1, 20210115, 2),
  50.     (1, 202101, 5, 105, 1, 20210115, 2),
  51.     (1, 202101, 6, 106, 2, 20210115, 2),
  52.     (1, 202101, 7, 107, 2, 20210115, 3),
  53.     (1, 202101, 8, 108, 2, 20210115, 3),
  54.     (1, 202101, 9, 109, 2, 20210115, 3),
  55.     (1, 202101, 10, 110, 2, 20210115, 1),
  56.     (1, 202101, 11, 111, 3, 20210116, null),
  57.     (1, 202101, 12, 112, 3, 20210116, null),
  58.     (1, 202101, 13, 113, 3, 20210116, null),
  59.     (1, 202101, 14, 114, 1, 20210116, 2),
  60.     (1, 202101, 15, 115, 1, 20210116, 2),
  61.     (1, 202101, 16, 116, 1, 20210116, 3),
  62.     (1, 202101, 17, 117, 1, 20210116, 3),
  63.     (1, 202101, 18, 118, 2, 20210116, 1),
  64.     (1, 202101, 19, 119, 2, 20210116, 1),
  65.     (1, 202101, 20, 120, 2, 20210116, 2),
  66.    
  67.     // miesiąc 2 - 202102 - podobne stałe wartości
  68.     (1, 202102, 1, 201, 1, 20210215, 1),
  69.     (1, 202102, 2, 202, 1, 20210215, 1),
  70.     (1, 202102, 3, 203, 1, 20210215, 1),
  71.     (1, 202102, 4, 204, 1, 20210215, 2),
  72.     (1, 202102, 5, 205, 1, 20210215, 2),
  73.     (1, 202102, 6, 206, 2, 20210215, 2),
  74.     (1, 202102, 7, 207, 2, 20210215, 3),
  75.     (1, 202102, 8, 208, 2, 20210215, 3),
  76.     (1, 202102, 9, 209, 2, 20210215, 3),
  77.     (1, 202102, 10, 210, 2, 20210215, 1),
  78.     (1, 202102, 11, 211, 3, 20210216, null),
  79.     (1, 202102, 12, 212, 3, 20210216, null),
  80.     (1, 202102, 13, 213, 3, 20210216, null),
  81.     (1, 202102, 14, 214, 1, 20210216, 2),
  82.     (1, 202102, 15, 215, 1, 20210216, 2),
  83.     (1, 202102, 16, 216, 1, 20210216, 3),
  84.     (1, 202102, 17, 217, 1, 20210216, 3),
  85.     (1, 202102, 18, 218, 2, 20210216, 1),
  86.     (1, 202102, 19, 219, 2, 20210216, 1),
  87.     (1, 202102, 20, 220, 2, 20210216, 2),
  88.    
  89.     // miesiąc 3 - 202103 - podobne stałe wartości
  90.     (1, 202103, 1, 301, 1, 20210315, 1),
  91.     (1, 202103, 2, 302, 1, 20210315, 1),
  92.     (1, 202103, 3, 303, 1, 20210315, 1),
  93.     (1, 202103, 4, 304, 1, 20210315, 2),
  94.     (1, 202103, 5, 305, 1, 20210315, 2),
  95.     (1, 202103, 6, 306, 2, 20210315, 2),
  96.     (1, 202103, 7, 307, 2, 20210315, 3),
  97.     (1, 202103, 8, 308, 2, 20210315, 3),
  98.     (1, 202103, 9, 309, 2, 20210315, 3),
  99.     (1, 202103, 10, 310, 2, 20210315, 1),
  100.     (1, 202103, 11, 311, 3, 20210316, null),
  101.     (1, 202103, 12, 312, 3, 20210316, null),
  102.     (1, 202103, 13, 313, 3, 20210316, null),
  103.     (1, 202103, 14, 314, 1, 20210316, 2),
  104.     (1, 202103, 15, 315, 1, 20210316, 2),
  105.     (1, 202103, 16, 316, 1, 20210316, 3),
  106.     (1, 202103, 17, 317, 1, 20210316, 3),
  107.     (1, 202103, 18, 318, 2, 20210316, 1),
  108.     (1, 202103, 19, 319, 2, 20210316, 1),
  109.     (1, 202103, 20, 320, 2, 20210316, 2),
  110.    
  111.     // miesiąc 4 - 202104 - podobne stałe wartości
  112.     (1, 202104, 1, 401, 1, 20210415, 1),
  113.     (1, 202104, 2, 402, 1, 20210415, 1),
  114.     (1, 202104, 3, 403, 1, 20210415, 1),
  115.     (1, 202104, 4, 404, 1, 20210415, 2),
  116.     (1, 202104, 5, 405, 1, 20210415, 2),
  117.     (1, 202104, 6, 406, 2, 20210415, 2),
  118.     (1, 202104, 7, 407, 2, 20210415, 3),
  119.     (1, 202104, 8, 408, 2, 20210415, 3),
  120.     (1, 202104, 9, 409, 2, 20210415, 3),
  121.     (1, 202104, 10, 410, 2, 20210415, 1),
  122.     (1, 202104, 11, 411, 3, 20210416, null),
  123.     (1, 202104, 12, 412, 3, 20210416, null),
  124.     (1, 202104, 13, 413, 3, 20210416, null),
  125.     (1, 202104, 14, 414, 1, 20210416, 2),
  126.     (1, 202104, 15, 415, 1, 20210416, 2),
  127.     (1, 202104, 16, 416, 1, 20210416, 3),
  128.     (1, 202104, 17, 417, 1, 20210416, 3),
  129.     (1, 202104, 18, 418, 2, 20210416, 1),
  130.     (1, 202104, 19, 419, 2, 20210416, 1),
  131.     (1, 202104, 20, 420, 2, 20210416, 2)
  132.   )
  133.  
  134.   // Dane dla złego sklepu (sklep_id=2) - duże wahania i brak danych w ostatnim miesiącu
  135.   val badShopData = Seq(
  136.     // miesiąc 1 - 202101 - dużo RX (30 pacjentów)
  137.     (2, 202101, 101, 1001, 1, 20210115, 1),
  138.     (2, 202101, 102, 1002, 1, 20210115, 1),
  139.     (2, 202101, 103, 1003, 1, 20210115, 1),
  140.     (2, 202101, 104, 1004, 1, 20210115, 2),
  141.     (2, 202101, 105, 1005, 1, 20210115, 2),
  142.     (2, 202101, 106, 1006, 1, 20210115, 3),
  143.     (2, 202101, 107, 1007, 1, 20210115, 1),
  144.     (2, 202101, 108, 1008, 1, 20210115, 2),
  145.     (2, 202101, 109, 1009, 1, 20210115, 3),
  146.     (2, 202101, 110, 1010, 1, 20210115, 1),
  147.     (2, 202101, 111, 1011, 1, 20210116, 2),
  148.     (2, 202101, 112, 1012, 1, 20210116, 3),
  149.     (2, 202101, 113, 1013, 1, 20210116, 1),
  150.     (2, 202101, 114, 1014, 1, 20210116, 2),
  151.     (2, 202101, 115, 1015, 1, 20210116, 3),
  152.     (2, 202101, 116, 1016, 2, 20210116, 1),
  153.     (2, 202101, 117, 1017, 2, 20210116, 2),
  154.     (2, 202101, 118, 1018, 2, 20210116, 3),
  155.     (2, 202101, 119, 1019, 2, 20210116, 1),
  156.     (2, 202101, 120, 1020, 2, 20210116, 2),
  157.     (2, 202101, 121, 1021, 3, 20210117, null),
  158.     (2, 202101, 122, 1022, 3, 20210117, null),
  159.     (2, 202101, 123, 1023, 3, 20210117, null),
  160.     (2, 202101, 124, 1024, 3, 20210117, null),
  161.     (2, 202101, 125, 1025, 3, 20210117, null),
  162.     (2, 202101, 126, 1026, 2, 20210117, 3),
  163.     (2, 202101, 127, 1027, 2, 20210117, 1),
  164.     (2, 202101, 128, 1028, 2, 20210117, 2),
  165.     (2, 202101, 129, 1029, 2, 20210117, 3),
  166.     (2, 202101, 130, 1030, 2, 20210117, 1),
  167.    
  168.     // miesiąc 2 - 202102 - mało pacjentów (5) i brak RX
  169.     (2, 202102, 101, 2001, 3, 20210215, null),
  170.     (2, 202102, 102, 2002, 3, 20210215, null),
  171.     (2, 202102, 103, 2003, 3, 20210215, null),
  172.     (2, 202102, 104, 2004, 3, 20210215, null),
  173.     (2, 202102, 105, 2005, 3, 20210215, null),
  174.    
  175.     // miesiąc 3 - 202103 - znowu dużo pacjentów (25) i dużo RX
  176.     (2, 202103, 101, 3001, 1, 20210315, 1),
  177.     (2, 202103, 102, 3002, 1, 20210315, 1),
  178.     (2, 202103, 103, 3003, 1, 20210315, 1),
  179.     (2, 202103, 104, 3004, 1, 20210315, 2),
  180.     (2, 202103, 105, 3005, 1, 20210315, 2),
  181.     (2, 202103, 106, 3006, 1, 20210315, 3),
  182.     (2, 202103, 107, 3007, 1, 20210315, 1),
  183.     (2, 202103, 108, 3008, 1, 20210315, 2),
  184.     (2, 202103, 109, 3009, 1, 20210315, 3),
  185.     (2, 202103, 110, 3010, 1, 20210315, 1),
  186.     (2, 202103, 111, 3011, 1, 20210316, 2),
  187.     (2, 202103, 112, 3012, 1, 20210316, 3),
  188.     (2, 202103, 113, 3013, 1, 20210316, 1),
  189.     (2, 202103, 114, 3014, 1, 20210316, 2),
  190.     (2, 202103, 115, 3015, 1, 20210316, 3),
  191.     (2, 202103, 116, 3016, 2, 20210316, 1),
  192.     (2, 202103, 117, 3017, 2, 20210316, 2),
  193.     (2, 202103, 118, 3018, 2, 20210316, 3),
  194.     (2, 202103, 119, 3019, 2, 20210316, 1),
  195.     (2, 202103, 120, 3020, 2, 20210316, 2),
  196.     (2, 202103, 121, 3021, 3, 20210317, null),
  197.     (2, 202103, 122, 3022, 3, 20210317, null),
  198.     (2, 202103, 123, 3023, 3, 20210317, null),
  199.     (2, 202103, 124, 3024, 3, 20210317, null),
  200.     (2, 202103, 125, 3025, 3, 20210317, null)
  201.    
  202.     // miesiąc 4 - 202104 - brak danych celowo (naruszenie reguł)
  203.   )
  204.  
  205.   // Łączymy dane i tworzymy DataFrame
  206.   val allData = goodShopData ++ badShopData
  207.   val salesDF = spark.createDataFrame(allData).toDF(
  208.     "shop_id", "month_id", "patient_id", "trn_id", "pack_id", "day_id", "prescriber_id"
  209.   )
  210.  
  211.   println(s"Wygenerowano dane sprzedaży z ${salesDF.count()} rekordami")
  212.  
  213.   salesDF
  214. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement