@@ -107,42 +107,40 @@ def prepare_draft_tokens(self,
107107 assert drafter .should_use_spec_decode (active_requests ,
108108 max_batch_size = 8 ,
109109 max_num_tokens = 4096 * 8 ,
110- max_draft_len = 4 ) is True
110+ max_draft_len = 4 )
111111
112112 # Small batch size ON case: num_effective_requests = min(12, 5, very_large) = 5 <= 6 → True
113113 active_requests = [object ()] * 12
114114 assert drafter .should_use_spec_decode (active_requests ,
115115 max_batch_size = 5 ,
116116 max_num_tokens = 4096 * 8 ,
117- max_draft_len = 4 ) is True
117+ max_draft_len = 4 )
118118
119119 # Small token budget ON case: token_cap = 28 // (1+4) = 5 → min(8, 12, 5) = 5 <= 6 → True
120120 active_requests = [object ()] * 12
121121 assert drafter .should_use_spec_decode (active_requests ,
122122 max_batch_size = 8 ,
123123 max_num_tokens = 28 ,
124- max_draft_len = 4 ) is True
124+ max_draft_len = 4 )
125125
126126 # Generic OFF case: num_effective_requests = min(12, 8, very_large) = 8 > 6 → False
127127 active_requests = [object ()] * 12
128- assert drafter .should_use_spec_decode (active_requests ,
129- max_batch_size = 8 ,
130- max_num_tokens = 4096 * 8 ,
131- max_draft_len = 4 ) is False
128+ assert not drafter .should_use_spec_decode (active_requests ,
129+ max_batch_size = 8 ,
130+ max_num_tokens = 4096 * 8 ,
131+ max_draft_len = 4 )
132132
133133 # Edge case - None active requests OFF case
134134 active_requests = []
135- assert drafter .should_use_spec_decode (active_requests ,
136- max_batch_size = 8 ,
137- max_num_tokens = 4096 * 8 ,
138- max_draft_len = 4 ) is False
135+ assert not drafter .should_use_spec_decode (active_requests ,
136+ max_batch_size = 8 ,
137+ max_num_tokens = 4096 * 8 ,
138+ max_draft_len = 4 )
139139
140140 # Edge case - Token cap equals 0 OFF case: token_cap = 4 // (1+4) = 0 → min(12, 8, 0) = 0 <= 6 → False
141141 active_requests = [object ()] * 12
142- assert drafter .should_use_spec_decode (active_requests ,
143- max_batch_size = 8 ,
144- max_num_tokens = 4 ,
145- max_draft_len = 4 ) is False
142+ assert not drafter .should_use_spec_decode (
143+ active_requests , max_batch_size = 8 , max_num_tokens = 4 , max_draft_len = 4 )
146144
147145
148146if __name__ == "__main__" :
0 commit comments