Tetris Reward Function

thewindmage420

Jan 23rd, 2025 (edited)

117

Never

Add comment

Not a member of Pastebin yet? Sign Up, it unlocks many cool features!

Python 3.90 KB | None | 0 0

raw download clone embed print report

def get_reward_and_next_state(
agent,
grid,
lines_cleared,
total_lines_cleared,
game_over,
rotations=0,
moved_horizontally=False,
moved_down=False,
locked=False,
tetrimino_shape=None,
x=0,
y=0,
score=0,
pieces_placed=0,
level=1,
device=None
):
"""
Revised Tetris reward function to strongly discourage tower-building and incentivize rotations.
"""
# 0) Step penalty: small negative each frame to avoid stalling
reward = -1
# 1) LINE CLEAR REWARD
line_clear_reward_map = {1: 1000, 2: 3000, 3: 7000, 4: 14000}
line_clear_points = line_clear_reward_map.get(lines_cleared, 0)
reward += line_clear_points
if lines_cleared > 0:
print(f"[DEBUG] Lines cleared: {lines_cleared} -> +{line_clear_points}")
# 2) ROTATION REWARD: Reward for performing rotations
if rotations > 3:
rotation_bonus = rotations * -5 # Adjust the multiplier as needed
reward += rotation_bonus
print(f"[DEBUG] Rotations performed: {rotations} -> +{rotation_bonus} reward")
# After state updates
after_holes = count_holes(grid)
after_max_height, after_col_heights = get_max_height_and_column_heights(grid)
after_bumpiness = get_bumpiness(after_col_heights)
if locked:
# Before locking, remove the locked piece to measure before state
grid_without = [row[:] for row in grid]
for i, row_block in enumerate(tetrimino_shape):
for j, block in enumerate(row_block):
if block and 0 <= (y + i) < GRID_HEIGHT and 0 <= (x + j) < GRID_WIDTH:
grid_without[y + i][x + j] = 0
before_holes = count_holes(grid_without)
before_max_height, before_col_heights = get_max_height_and_column_heights(grid_without)
before_bumpiness = get_bumpiness(before_col_heights)
# 3) LARGE PENALTIES FOR HEIGHT, HOLES, BUMPINESS
height_penalty = 60.0
hole_penalty = 40.0
bump_penalty = 30.0
reward -= after_max_height * height_penalty
reward -= after_holes * hole_penalty
reward -= after_bumpiness * bump_penalty
# 4) HARSH PENALTY IF ANY COLUMN ABOVE 75%
if after_max_height >= 0.75 * GRID_HEIGHT:
reward -= 500
print("[DEBUG] Tall column penalty: -500")
# 5) REWARD "IMPROVEMENTS" ONLY IF THE BOARD DIDN'T WORSEN
board_not_worse = (
after_holes <= before_holes and
after_max_height <= before_max_height and
after_bumpiness <= before_bumpiness
)
if board_not_worse:
improvement_bonus = 20
if (after_holes < before_holes) or (after_max_height < before_max_height) or (after_bumpiness < before_bumpiness):
improvement_bonus += 20
reward += improvement_bonus
print(f"[DEBUG] Board not worse => +{improvement_bonus} improvement bonus")
# 6) Add partial fill bonus
partial_fill = 0
for row in grid:
fill_count = sum(1 for cell in row if cell != 0)
if fill_count >= GRID_WIDTH - 2:
partial_fill += (fill_count - (GRID_WIDTH - 2)) * 5
if partial_fill > 0:
reward += partial_fill
print(f"[DEBUG] Partial fill bonus => +{partial_fill}")
# 7) Extra penalty for locking
reward -= 5
# 8) Rotation penalty
if rotations > 3:
penalty = (rotations - 3) * 5
reward -= penalty
print(f"[DEBUG] Rotation penalty => -{penalty}")
# 9) GAME OVER PENALTY
if game_over:
reward -= 3000
print("[DEBUG] Game over penalty => -3000")
# Build next state with 5 channels
next_state = construct_state_tensor(grid, score, lines_cleared, pieces_placed, level, device)
return reward, next_state

Add Comment

Please, Sign In to add comment