Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def get_reward_and_next_state(
- agent,
- grid,
- lines_cleared,
- total_lines_cleared,
- game_over,
- rotations=0,
- moved_horizontally=False,
- moved_down=False,
- locked=False,
- tetrimino_shape=None,
- x=0,
- y=0,
- score=0,
- pieces_placed=0,
- level=1,
- device=None
- ):
- """
- Revised Tetris reward function to strongly discourage tower-building and incentivize rotations.
- """
- # 0) Step penalty: small negative each frame to avoid stalling
- reward = -1
- # 1) LINE CLEAR REWARD
- line_clear_reward_map = {1: 1000, 2: 3000, 3: 7000, 4: 14000}
- line_clear_points = line_clear_reward_map.get(lines_cleared, 0)
- reward += line_clear_points
- if lines_cleared > 0:
- print(f"[DEBUG] Lines cleared: {lines_cleared} -> +{line_clear_points}")
- # 2) ROTATION REWARD: Reward for performing rotations
- if rotations > 3:
- rotation_bonus = rotations * -5 # Adjust the multiplier as needed
- reward += rotation_bonus
- print(f"[DEBUG] Rotations performed: {rotations} -> +{rotation_bonus} reward")
- # After state updates
- after_holes = count_holes(grid)
- after_max_height, after_col_heights = get_max_height_and_column_heights(grid)
- after_bumpiness = get_bumpiness(after_col_heights)
- if locked:
- # Before locking, remove the locked piece to measure before state
- grid_without = [row[:] for row in grid]
- for i, row_block in enumerate(tetrimino_shape):
- for j, block in enumerate(row_block):
- if block and 0 <= (y + i) < GRID_HEIGHT and 0 <= (x + j) < GRID_WIDTH:
- grid_without[y + i][x + j] = 0
- before_holes = count_holes(grid_without)
- before_max_height, before_col_heights = get_max_height_and_column_heights(grid_without)
- before_bumpiness = get_bumpiness(before_col_heights)
- # 3) LARGE PENALTIES FOR HEIGHT, HOLES, BUMPINESS
- height_penalty = 60.0
- hole_penalty = 40.0
- bump_penalty = 30.0
- reward -= after_max_height * height_penalty
- reward -= after_holes * hole_penalty
- reward -= after_bumpiness * bump_penalty
- # 4) HARSH PENALTY IF ANY COLUMN ABOVE 75%
- if after_max_height >= 0.75 * GRID_HEIGHT:
- reward -= 500
- print("[DEBUG] Tall column penalty: -500")
- # 5) REWARD "IMPROVEMENTS" ONLY IF THE BOARD DIDN'T WORSEN
- board_not_worse = (
- after_holes <= before_holes and
- after_max_height <= before_max_height and
- after_bumpiness <= before_bumpiness
- )
- if board_not_worse:
- improvement_bonus = 20
- if (after_holes < before_holes) or (after_max_height < before_max_height) or (after_bumpiness < before_bumpiness):
- improvement_bonus += 20
- reward += improvement_bonus
- print(f"[DEBUG] Board not worse => +{improvement_bonus} improvement bonus")
- # 6) Add partial fill bonus
- partial_fill = 0
- for row in grid:
- fill_count = sum(1 for cell in row if cell != 0)
- if fill_count >= GRID_WIDTH - 2:
- partial_fill += (fill_count - (GRID_WIDTH - 2)) * 5
- if partial_fill > 0:
- reward += partial_fill
- print(f"[DEBUG] Partial fill bonus => +{partial_fill}")
- # 7) Extra penalty for locking
- reward -= 5
- # 8) Rotation penalty
- if rotations > 3:
- penalty = (rotations - 3) * 5
- reward -= penalty
- print(f"[DEBUG] Rotation penalty => -{penalty}")
- # 9) GAME OVER PENALTY
- if game_over:
- reward -= 3000
- print("[DEBUG] Game over penalty => -3000")
- # Build next state with 5 channels
- next_state = construct_state_tensor(grid, score, lines_cleared, pieces_placed, level, device)
- return reward, next_state
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement