@article{doi:10.5116/ijme.50c3.2f27, author = {Spielvogel, R. and Stednick, Z. and Beckett, L. and Latimore, D.}, title = {Sources of variability in medical student evaluations on the internal medicine clinical rotation}, journal = {Int J Med Educ}, volume = {3}, number = {}, pages = {245-251}, year = {2012}, doi = {10.5116/ijme.50c3.2f27}, URL = {http://www.ijme.net/archive/3/variability-in-medical-student-evaluations/},eprint = {http://www.ijme.net/archive/3/variability-in-medical-student-evaluations.pdf}, abstract = {Objectives: To explore the sources of variability in evaluator ratings among third year medical students in the Internal Medicine clinical rotation. Also, to examine systematic effects and variability introduced by differences in the various student, evaluator, and evaluation settings. Methods: A multilevel model was used to estimate the amount of between-student, between-rater and rater-student interaction variability present in the students' clinical evaluations in a third year internal medicine clinical rotation. Within this model, linear regression analysis was used to estimate the effect of variables on the students' numerical evaluation scores and the reliability of those scores. Results: A total of 2,747 evaluation surveys were collected from 389 evaluators on 373 students over 4.5 years. All surveys used a nine-point grading scale, and therefore all results are reported on this scale. The calculated between-rater, between-student and rater-student interaction variance components were 0.50, 0.27 and 0.62, respectively. African American/Black students had lower scores than Caucasian students by 0.58 points (t=-3.28; P=0.001). No gender effects were noted. Conclusions: These between-rater and between-student variance components imply that the evaluator plays a larger role in the students' scores than the students themselves. The residual rater-student interaction variance was larger and did not change by accounting for the measured demographic variables. This implies there is significant variability in each rater-student interaction that remains unexplained. This could contribute to unreliability in the system, requiring that students receive between 8 and 17 clinical evaluations to achieve 80% reliability.}, }