/************************************************************************** Code for "Own Type Bias" Created in: May 2020 Credted by: Zeeshan Samad Contact: zeeshan.samad@vanderbilt.edu ----------------- Table of Contents ----------------- 0. Create data set > Import raw data from redcap > rename/label variables > export excel file to pay mturk worker bonuses > create final data set for analysis 1. Figures > Fig 3: actual distribution > Fig 4: predicted distributions > Fig 5: distributions about focal points 0,5,10 > Fig 6: predicted distributions in more detail 2. Tables > Table 1: Summary Statistics > Table 2: Prediction Accuracy ********************** * 0. Create Data set ********************** global input_mturk C:\Users\Zeeshan\Box Sync\Own type bias\Experiment\MTurk batch files global input C:\Users\Zeeshan\Box Sync\Own type bias\Experiment\Results //First make some changes in the mturk file (this is not the main dataset) import delimited "$input_mturk\mturk_batch_files.txt", clear /*note: To create this file, first download all mturk batch files in a folder, in cmd go to that folder (e.g. cd "C:\Users\Zeeshan\Box Sync\Own type bias\Experiment\MTurk batch files") then type the following: copy *.csv mturk_batch_files.txt */ gen mturk_code = regexr(answersurveycode, `""HITId"' ,"") label var mturk_code "Code sent back BY subject" gen BatchId = ustrleft( requesterannotation, 15) replace BatchId = regexr( BatchId, "BatchId:", "") destring BatchId , replace label var BatchId "MTurk Batch ID" sort BatchId egen session = group(BatchId) replace session = 6 if BatchId ==3985977 replace session = 3 if BatchId ==3985979 replace session = 4 if BatchId ==3988469 replace session = 5 if BatchId ==3988478 label var session "MTurk session no." rename workerid mturk_id rename worktimeinseconds time_taken label var time_taken "Time (seconds) taken to complete HIT on MTurk" gen particip_fee_paid = (assignmentstatus == "Approved") label var particip_fee_paid "If HIT approved on MTurk" keep mturk_id time_taken mturk_code session //save in stata format so it can be merged into main file save "$input\mturk_data.dta", replace // Now Create the Main Data Set import delimited "$input\OwnTypeBias_DATA.csv", clear /*note: this file comes from redcap using the following steps: data exports > all data > export data > csv(raw) > export&overwrite */ // Remove duplicates in worker id replace mturk_id = regexr(mturk_id, "COPIED", "") replace mturk_id = regexr(mturk_id, "_", "") duplicates tag mturk_id, generate(dup) drop if dup>0 & task_2_complete == 0 // 17 obs deleted drop dup // bring in mturk related data merge 1:1 mturk_id using "$input\mturk_data.dta" // should not end up with any obs with _merge=2 drop if _merge==1 & passed==0 // 138 obs dropped. these guys did not even reach the survey drop if _merge==1 & task_2_complete==0 // 5 obs dropped. these guys closed the survey half way through. drop if _merge==3 & passed==0 // 7 obs dropped. these actually got wrongly paid drop if running_sum > 100 // 1 obs deleted. this guy did not follow instructions in guessing task drop _merge //since all non-matched obs are now gone // Check if everyone submitted correct code: destring mturk_code , replace count if code != mturk_code //this should be zero -- it is 3 bc these 3 subjects were given another chance through separate HITs, and are thus not in the main mturk batch files. // Subjects who asked for receipt gen asked_for_receipt = (receipt_email !="") label var asked_for_receipt "If subject asked for receipt" // Drop extra variables drop redcap_survey_identifier drop instructions_ctrl_questions_time drop instructions_ctrl_questions_comp drop task_1_timestamp task_1_complete drop task_2_timestamp drop task_2_complete drop questionnaire_timestamp drop questionnaire_complete drop ctrl1 ctrl2 ctrl3a ctrl3b ctrl3c ctrl4a ctrl4b passed drop payoff_task1 drop donation_dollars drop running_sum drop max_pct drop receipt_email // Race gen num_race = q_race___1+ q_race___2+ q_race___3+ q_race___4+ q_race___5+ q_race___6 gen race = . //subjects with only 1 race forvalues i = 1/6 { replace race = `i' if q_race___`i'==1 & num_race==1 } //subjects with more than 1 race replace race = 3 if q_race___3 ==1 & q_race___1 ==1 label define race_label 1 "White" 2 "Black" 3 "Hispanic" 4 "Asian" 5 "Native American" 6 "Other" label values race race_label gen race_white= (race==1) // dummy variable for white drop q_race* drop num_race // age label define age_label 1 "Under 18" 2 "18-25" 3 "25-35" 4 "35-45" 5 "45-55" 6 "55+" label values q_age_bracket age_label // charity name rename charity charity_name label define charity_label 1 "Domestic Violence Intrvn Srvc" 2 "American Red Cross" /// 3 "World Wildlife Fund" 4 "UNICEF USA" 5 "Feeding America" 6 "Doctors Without Borders" /// 7 "American Heart Assoc." 8 "Smithsonian Institution" 9 "Direct Relief" /// 10 "United Way Worldwide" 11 "Teach for America" label values charity_name charity_label // occupation label define occupation_label 1 "Unemployed" 2 "Student" 3 "Employed" 4 "Other" label values q_occupation occupation_label // income label define income_label 1 "< $20k" 2 "20k - 35k" 3 "35k - 50k" 4 "50k - 75k" /// 5 "75k - 100k" 6 "100k - 150k" 7 "> $150k" label values q_income_cat income_label // own_x label define x_label 0 "Own donation of 0 (N=42)" 1 "Own donation of 1 (N=27)" 2 "Own donation of 2 (N=19)" /// 3 "Own donation of 3 (N=3)" 4 "Own donation of 4 (N=3)" 5 "Own donation of 5 (N=24)" /// 6 "Own donation of 6 (N=1)" 7 "Own donation of 7 (N=0)" 8 "Own donation of 8 (N=0)" /// 9 "Own donation of 9 (N=0)" 10 "Own donation of 10 (N=11)" label values own_x x_label // rename variables rename q_occupation occup rename q_income_cat income rename q_educ educ rename q_religious religious rename q_female female rename q_age_bracket age rename q_motivation q_why_diff rename q_motivation2 q_why_same rename mode_guess guess_mode // Label variables label var income "Income Category (1-7)" label var educ "Years of Education" label var religious "Religiousness (0-3)" label var age "Age Bracket (1-6)" label var female "% Female" label var race "1=W, 2=B, 3=H, 4=A, 5=NA, 6=Other" label var race_white "% White" label var charity_name "Charity Chosen" label var redcap_id "Subject ID" label var mturk_id "MTurk worker ID" label var own_x "Own donation amount" label var occup "Occupation" forvalues i = 0/10 { label var guess_x`i' "Gues abt what % chose x=`i'" } label var q_why_diff "In what way are you diff from others" label var q_why_same "In what way are you same as others" label var additional_commen "Additional comments" label var code "Code given TO subject" label var guess_mode "gues abt mode of distribution" // calculate actual % of subjects who donate x count scalar num_total = r(N) // stores a scalar for num of subjects forvalues i = 0/10 { count if own_x == `i' scalar num_x`i' = r(N) // num of subjects who choose x=0 scalar actual_x`i' = num_x`i' / num_total * 100 //% who choose x=0 scalar drop num_x`i' gen accurate`i' = ( abs(guess_x`i' - actual_x`i') <= 1 ) label var accurate`i' "if guess`i' accurate" } gen num_accurate = accurate0 + accurate1 + accurate2 + accurate3 + /// accurate4 + accurate5 + accurate6 + accurate7 + /// accurate8 + accurate9 + accurate10 label var num_accurate "# of accurate guesses" // determine subject's bonus payoff gen payoff_task2_usd = num_accurate * 0.13 gen bonus_usd = payoff_task1_usd + payoff_task2_usd label var bonus_usd "worker bonus in usd" drop payoff_task1_usd payoff_task2_usd forvalues i = 0/10 { drop accurate`i' } gen worker_link = "https://requester.mturk.com/workers/" + mturk_id label var worker_link "link to mturk profile" // use the file below to pay workers on MTurk export excel mturk_id bonus_usd session worker_link /// using "$input\MTurk worker bonus.xlsx", firstrow(variables) replace *note: next step: open this new excel file and in a new column type =hyperlink(D2) // define subject type in terms of low, medium, high gen subject_type = 1 if own_x <3 replace subject_type = 2 if own_x >=3 & own_x <= 7 replace subject_type = 3 if own_x >7 label var subject_type "Subject's type" label define type_label 1 "Low (0-2)" 2 "Medium (3-7)" 3 "High (8-10)" label values subject_type type_label // guess about % of each subject type (note: guess1 + guess2 + guess3 =100) gen guess1 = guess_x0+ guess_x1+ guess_x2 label var guess1 "Gues abt what % are low type" gen guess2 = guess_x3+ guess_x4+ guess_x5+ guess_x6+ guess_x7 label var guess2 "Gues abt what % are med type" gen guess3 = guess_x8+ guess_x9+ guess_x10 label var guess3 "Gues abt what % are high type" // order variables order redcap_id mturk_id own_x charity_name num_accurate subject_type /// bonus female age race race_white religious educ occup income code /// mturk_code time_taken session q_why_diff q_why_same /// additional_comments guess_mode // drop unwanted variables drop worker_link drop code drop mturk_code drop session drop bonus_usd drop num_accurate save "$input\Final Data.dta", replace /************************** Analysis Begins Here ***************************/ global input C:\Users\Zeeshan\Box Sync\Own type bias\Experiment\Results global output C:\Users\Zeeshan\Box Sync\Own type bias\Paper (own type bias)\Pictures use "$input\Final Data.dta", clear /************** 1. Figures **************/ //Fig 3: Actual distribution histogram own_x, discrete percent kdensity kdenopts(width(.7) gaussian) /// xtitle("Donation amount (tokens)") xlabel(0(1)10, labsize(small) noticks) /// ytitle("% of subjects") ylabel(0(5)45, labsize(small) angle(zero)) /// fcolor(gs11) lcolor(black) lwidth(thin) addlabel addlabopts(mlabsize(small)) /// legend(off) scheme(s1mono) plotregion(lwidth(none) margin(bargraph)) graph export "$output\fig3.eps", as(eps) preview(off) replace //Fig 4: Pie chart of types label define type_label 1 " Low Types" /// 2 " Medium Types" /// 3 "High Types" , modify label values subject_type type_label graph pie, over(subject_type) plabel(_all name) /// plabel(1 percent, gap(8) size(small)) /// plabel(2 percent, gap(-12) size(small)) /// plabel(3 percent, gap(-4) size(small)) /// pie(_all, color(gs11)) line(lcolor(black) lwidth(thin)) /// legend(off) scheme(s1mono) plotregion(lcolor(none)) graph export "$output\fig4.eps", as(eps) preview(off) replace //Figure 5: estimations made by low/med/high types preserve gen n = subject_type forvalues i = 0/10 { //predicted values gen mean`i' = guess_x`i' gen se`i' = guess_x`i' //actual values count if own_x == `i' gen actual`i' = 100 * r(N) / 130 } collapse (mean) mean* actual* (semean) se* (count) n, by(subject_type) reshape long actual mean se, i(subject_type n) j(x) gen ci_upper = mean + invttail(n-1,0.025)*se gen ci_lower = mean - invttail(n-1,0.025)*se gen label = string(mean, "%8.1f") + "% " gen label_actual = string(actual, "%8.1f") + "% " //figure - panels b,c,d local i=1 foreach j in Low Medium High { twoway (bar mean x, fcolor(gs11) lcolor(black)) (scatter mean x, msymbol(none) /// mlabel(label) mlabsize(vsmall) mlabposition(12) mlabgap(zero)) /// (rcap ci_upper ci_lower x, lcolor(black%80) lwidth(thin)) if subject_type==`i', /// xtitle("Donation amount (tokens)") xlabel(0(1)10, labsize(small) noticks) /// ylabel(0(5)45, labsize(small) angle(zero)) ytitle("% of subjects") legend(off) /// title("`j' Types' Guess", ring(0)) /// scheme(s1mono) plotregion(lwidth(none) margin(bargraph)) graph export "$output\fig5_`j'.eps", as(eps) preview(off) replace local i=`i'+1 } //figure - panel a (actual distribution) *note: do it for only one subject_type (1,2 or 3). otw it makes 3 charts on top of one another twoway (bar actual x, fcolor(gs11) lcolor(black)) (scatter actual x, msymbol(none) /// mlabel(label_actual) mlabsize(vsmall) mlabposition(12) mlabgap(zero)) if subject_type==1, /// xtitle("Donation amount (tokens)") xlabel(0(1)10, labsize(small) noticks) /// ylabel(0(5)45, labsize(small) angle(zero)) ytitle("% of subjects") legend(off) /// title("Actual Commonnness of Donation Amounts", ring(0)) /// scheme(s1mono) plotregion(lwidth(none) margin(bargraph)) graph export "$output\fig5_Actual.eps", as(eps) preview(off) replace restore //Figure 6: Focal points //preparation gen n = subject_type forvalues i = 0/10 { *mean and se of predicted values gen mean`i' = guess_x`i' gen se`i' = guess_x`i' *actual values count if own_x == `i' gen actual`i' = 100 * r(N) / 130 } collapse (mean) mean* actual* (semean) se* (count) n, by(subject_type) reshape long actual mean se, i(subject_type n) j(x) gen ci_upper = mean + invttail(n-1,0.025)*se gen ci_lower = mean - invttail(n-1,0.025)*se gen label = string(mean, "%8.1f") + "% " //figure 6 foreach i of numlist 0 5 10 { quietly: mean(actual) if x==`i' matrix temp = e(b) local mean1 = temp[1,1] +1 twoway (bar mean subject_type, fcolor(gs11) lcolor(black) barwidth(.8)) /// (function `mean1'-1, range(0 4) lcolor(black) lpattern(dash)) /// (scatter mean subject_type, msymbol(none) mlabel(label) mlabsize(small) mlabposition(1) mlabgap(zero)) /// (rcap ci_upper ci_lower subject_type, lcolor(black%20)) if x==`i', /// text(`mean1' 3.8 "Actual %") xtitle("") ytitle("% of subjects") /// xlabel(1 `" "Low Type's" "Guess" "' 2 `" "Medium Type's" "Guess" "' 3 `" "High Type's" "Guess" "', labsize(small) noticks) /// ylabel(0(5)45, labsize(small) angle(zero)) /// title("Proportion of subjects who donate `i' tokens", ring(0)) /// legend(off) scheme(s1mono) plotregion(lwidth(none) margin(zero)) graph export "$output\fig62_`i'.eps", as(eps) preview(off) replace } restore //Figure 7: Predicted distribution of types, by low,med,high types preserve gen n = subject_type forvalues i = 1/3 { gen mean`i' = guess`i' gen se`i' = guess`i' count if subject_type == `i' gen actual`i' = 100 * r(N) / 130 } collapse (mean) mean* actual* (semean) se* (count) n, by(subject_type) reshape long actual mean se, i(subject_type n) j(x) gen ci_upper = mean + invttail(n-1,0.025)*se gen ci_lower = mean - invttail(n-1,0.025)*se gen label = string(mean, "%8.1f") + "% " gen label_actual = string(actual, "%8.1f") + "% " //actual figure (actual distribution/ panel a) twoway (bar actual x, fcolor(gs11) lcolor(black)) /// (scatter actual x, msymbol(none) mlabel(label_actual) mlabsize(small) mlabposition(12) mlabgap(zero)) /// if subject_type==1, xtitle("") xlabel(1 "Low Types" 2 "Medium Types" 3 "High Types", noticks) /// ytitle("% of subjects") ylabel(0(10)80, labsize(small) angle(zero)) legend(off) /// scheme(s1mono) plotregion(lwidth(none) margin(bargraph)) xsize(3) ysize(3) graph export "$output\fig7_Actual.eps", as(eps) preview(off) replace //actual figure (predicted distributions/ panels b-d) local i = 1 foreach j in Low Medium High { twoway (bar mean x, fcolor(gs11) lcolor(black)) /// (scatter mean x, msymbol(none) mlabel(label) mlabsize(small) mlabposition(1) mlabgap(zero)) /// (rcap ci_upper ci_lower x, lcolor(black%80) lwidth(thin)) if subject_type==`i', /// xtitle("") xlabel(1 "Low Types" 2 "Medium Types" 3 "High Types", noticks) /// ytitle("% of subjects") ylabel(0(10)80, labsize(small) angle(zero)) legend(off) /// title("Prediction Made by `j' Types", ring(0)) /// scheme(s1mono) plotregion(lwidth(none) margin(bargraph)) xsize(3) ysize(3) graph export "$output\fig7_`j'.eps", as(eps) preview(off) replace local i = `i'+1 } restore //Figure 8 (Appendix) preserve gen n = own_x forvalues i = 0/10 { gen mean`i' = guess_x`i' gen se`i' = guess_x`i' count if own_x == `i' gen actual`i' = 100 * r(N) / 130 } //collapse to get 11x8 dataset (8 diff types of subjects, each makes 11 guesses) collapse (mean) mean* actual* (semean) se* (count) n, by(own_x subject_type) reshape long actual mean se, i(own_x subject_type n) j(x) gen ci_upper = mean + invttail(n-1,0.025)*se gen ci_lower = mean - invttail(n-1,0.025)*se //actual figure: numlabel, remove twoway (bar mean x, fcolor(gs11) lcolor(black)), by(own_x, iyaxes ixaxes /// noixtick title("Predicted Distributions") note("") plotregion(lwidth(none)) /// legend(off) graphregion(margin(zero)) ) plotregion(margin(bargraph) lwidth(none)) /// xtitle("Donation amount (tokens)") xlabel(0(1)10, labsize(vsmall)) /// ytitle("% of subjects") ylabel(0(10)50, labsize(vsmall) angle(zero) nogrid) /// subtitle(, size(small) ring(0) nobox alignment(top)) scheme(s1mono) *(rcap ci_upper ci_lower x, lcolor(black%80) lwidth(thin)) << not making error bars bc it looks messy graph export "$output\fig7.eps", as(eps) preview(off) replace restore /*********************************************************** 3. Tables ***********************************************************/ global input C:\Users\Zeeshan\Box Sync\Own type bias\Experiment\Results global output C:\Users\Zeeshan\Box Sync\Own type bias\Paper (own type bias)\Pictures use "$input\Final Data.dta", clear // Table 1: Summary Statistics global vars_sumstat own_x guess_mode female age race_white religious educ income mean $vars_sumstat eststo all mean $vars_sumstat if subject_type==1 eststo low mean $vars_sumstat if subject_type==2 eststo medium mean $vars_sumstat if subject_type==3 eststo high esttab low medium high all using "$output\Table 1.csv", replace /// se nostar label mtitle("Low Type" "Medium Type" "High Type" "Total") eststo clear // Table 2a: Prediction accuracy with p-values global all_guesses guess_x0 guess_x1 guess_x2 guess_x3 guess_x4 guess_x5 /// guess_x6 guess_x7 guess_x8 guess_x9 guess_x10 //note: making this table manually bc couldn't find any suitable estout/esttab command //first create an empty table scalar drop _all putexcel set "$output\Table 2.xlsx", modify //modify instead of replace will preserve any formatting done manually putexcel A1 = "Donation Amount" putexcel B1 = "Actual %" putexcel C1 = "Avg Guess - Low Type" putexcel D1 = "p-value (t test)" putexcel E1 = "Avg Guess - Med Type" putexcel F1 = "p-value (t test)" putexcel G1 = "Avg Guess - High Type" putexcel H1 = "p-value (t test)" mean $all_guesses if subject_type == 1 matrix low_guess = e(b)' putexcel C2 = matrix(low_guess) mean $all_guesses if subject_type == 2 matrix med_guess = e(b)' putexcel E2 = matrix(med_guess) mean $all_guesses if subject_type == 3 matrix high_guess = e(b)' putexcel G2 = matrix(high_guess) // Calculate actual values forvalues i = 0/10 { local row = `i'+2 quietly: count if own_x == `i' local n`i' = r(N) local actualx`i' = 100 * `n`i'' / 130 * % of subjects who donated x tokens putexcel A`row'=`i' putexcel B`row'=`actualx`i'' * t-tests quietly: ttest guess_x`i' == `actualx`i'' if subject_type==1 scalar low_p`i' = r(p) putexcel D`row' = low_p`i' quietly: ttest guess_x`i' == `actualx`i'' if subject_type==2 scalar med_p`i' = r(p) putexcel F`row' = med_p`i' quietly: ttest guess_x`i' == `actualx`i'' if subject_type==3 scalar high_p`i' = r(p) putexcel H`row' = high_p`i' } // Table 2b: Prediction accuracy about % of low/med/hi types //making excel table manually, in the same excel file as table 2a. scalar drop _all putexcel set "$output\Table 2.xlsx", modify putexcel A19 = "Predictions about % who are Low/Medium/High Types" putexcel A21 = "Subject Type" putexcel A22 = "Low" putexcel A23 = "Medium" putexcel A24 = "High" putexcel B21 = "Actual (%)" putexcel C20 = "Low Type" putexcel C21 = "Prediction (%)" putexcel D21 = "p-value" putexcel E20 = "Medium Type" putexcel E21 = "Prediction (%)" putexcel F21 = "p-value" putexcel G20 = "High Type" putexcel G21 = "Prediction (%)" putexcel H21 = "p-value" //Put in Mean of low/med/high type's predictions mean guess1 guess2 guess3 if subject_type == 1 matrix mean1 = e(b)' putexcel C22 = matrix(mean1) mean guess1 guess2 guess3 if subject_type == 2 matrix mean2 = e(b)' putexcel E22 = matrix(mean2) mean guess1 guess2 guess3 if subject_type == 3 matrix mean3 = e(b)' putexcel G22 = matrix(mean3) //now put in p-values forvalues i = 1/3 { local row = `i'+ 21 //Actual % of low/med/high types count if subject_type == `i' local n_`i' = r(N) / 130 * 100 putexcel B`row' = `n_`i'' //p values from ttests quietly: ttest guess`i' == `n_`i'' if subject_type==1 scalar p_low_`i' = r(p) putexcel D`row' = p_low_`i' quietly: ttest guess`i' == `n_`i'' if subject_type==2 scalar p_med_`i' = r(p) putexcel F`row' = p_med_`i' quietly: ttest guess`i' == `n_`i'' if subject_type==3 scalar p_high_`i' = r(p) putexcel H`row' = p_high_`i' } /**************************************** End of Do File ****************************************/