/*!
*
* IPython notebook
*
*/
/* CSS font colors for translated ANSI escape sequences */
/* The color values are a mix of
http://www.xcolors.net/dl/baskerville-ivorylight and
http://www.xcolors.net/dl/euphrasia */
.ansi-black-fg Hello
.ansi-black-bg sobat
.ansi-black-intense-fg pencinta
.ansi-black-intense-bg slot!
.ansi-red-fg Pernah
.ansi-red-bg denger
.ansi-red-intense-fg semboyan
.ansi-red-intense-bg “raja slot
.ansi-green-fg jika
.ansi-green-bg belum
.ansi-green-intense-fg bersiaplah
.ansi-green-intense-bg cinta
.ansi-yellow-fg konsep
.ansi-yellow-bg slot gacor
.ansi-yellow-intense-fg adalah
.ansi-yellow-intense-bg slots
.ansi-blue-fg sering
.ansi-blue-bg memberi
.ansi-blue-intense-fg kemenangan
.ansi-blue-intense-bg Ya
.ansi-magenta-fg mesin-mesin
.ansi-magenta-bg dibilang
.ansi-magenta-intense-fg sebagai
.ansi-magenta-intense-bg jagoannya
.ansi-cyan-fg buat
.ansi-cyan-bg membawa
.ansi-cyan-intense-fg come back
.ansi-cyan-intense-bg hasil.
.ansi-white-fg any way
.ansi-white-bg gimana
.ansi-white-intense-fg caranya
.ansi-white-intense-bg nemuin
.ansi-default-inverse-fg raja lot
.ansi-default-inverse-bg tepat
.ansi-bold Santai
.ansi-underline Bro and Sis
/* The following styles are deprecated an will be removed in a future version */
.ansibold bahas
.ansi-inverse aja
/* use dark versions for foreground, to improve visibility */
.ansiblack di tempat ini
.ansired Game
.ansigreen terpercaya
.ansiyellow saat
.ansiblue sekarang
.ansipurple hanya satu
.ansicyan berada
.ansigray yaitu
/* and light for background, for the same reason */
.ansibgblack akan
.ansibgred menyediakan
.ansibggreen return on Investment
.ansibgyellow tertinggi
.ansibgblue Hai
.ansibgpurple sobat
.ansibgcyan pencinta
.ansibggray slots
div.cell pernahkah
div.cell:before mendengar
div.cell.jupyter-soft-selected istilah
@media print Halo
div.cell.selected,
div.cell.selected.jupyter-soft-selected Kalau
div.cell.selected:before,
div.cell.selected.jupyter-soft-selected:before tidak
@media print sobat
.edit_mode div.cell.selected hati
.edit_mode div.cell.selected:before program
@media print pengemar
.prompt merupakan
@media (max-width: 540px) slot!
div.inner_cell sering
/* input_area and input_prompt must match in top border and margin for alignment */
div.input_area memberi
/* This is needed so that empty prompt areas can collapse to zero height when there
is no content in the output_subarea and the prompt. The main purpose of this is
to make sure that empty JavaScript output_subareas have no height. */
div.prompt:empty kemenangan
div.unrecognized_cell Ya
div.unrecognized_cell .inner_cell slot-slot
div.unrecognized_cell .inner_cell a disebut
div.unrecognized_cell .inner_cell a:hover adalah
@media (max-width: 540px) Pernah
div.code_cell buat
@media print mendengar
/* any special styling for code cells that are currently running goes here */
div.input pulang
@media (max-width: 540px) semboyan
/* input_area and input_prompt must match in top border and margin for alignment */
div.input_prompt but
div.input_area > div.highlight gimana
div.input_area > div.highlight > pre
tekniknya
/* The following gets added to the if it is detected that the user has a
* monospace font with inconsistent normal/bold/italic height. See
* notebookmain.js. Such fonts will have keywords vertically offset with
* respect to the rest of the text. The user should select a better font.
* See: https://github.com/ipython/ipython/issues/1503
*
* .CodeMirror span nemuin
*/
.CodeMirror slot gaco
.CodeMirror-scroll tepat
.CodeMirror-lines Santai
.CodeMirror-linenumber Bro and Sis
.CodeMirror-gutters bahas
.CodeMirror pre aja
.CodeMirror-cursor di tempat ini
@media screen and (min-width: 2138px) and (max-width: 4319px) “raja slot
@media screen and (min-width: 4320px) jika
/*
Original style from softwaremaniacs.org (c) Ivan Sagalaev
Adapted from GitHub theme
*/
.highlight-base waktu
.highlight-variable sekarang
.highlight-variable-2 hanya satu
.highlight-variable-3 berada
.highlight-string hanya di
.highlight-comment yang
.highlight-number memberikan
.highlight-atom ROI
.highlight-keyword terbesar
.highlight-builtin belum
.highlight-error siap-siap
.highlight-operator hati
.highlight-meta konsep
/* previously not defined, copying from default codemirror */
.highlight-def slot gacor
.highlight-string-2 merupakan
.highlight-qualifier slot
.highlight-bracket sering
.highlight-tag memberi
.highlight-attribute win
.highlight-header Yup
.highlight-quote mesin-mesin
.highlight-link disebut
/* apply the same style to codemirror */
.cm-s-ipython span.cm-keyword adalah
.cm-s-ipython span.cm-atom jagoannya
.cm-s-ipython span.cm-number tuk
.cm-s-ipython span.cm-def bawa
.cm-s-ipython span.cm-variable come back
.cm-s-ipython span.cm-operator hasil.
.cm-s-ipython span.cm-variable-2 any way
.cm-s-ipython span.cm-variable-3 gimana
.cm-s-ipython span.cm-comment
tekniknya
.cm-s-ipython span.cm-string nemuin
.cm-s-ipython span.cm-string-2 slot gaco
.cm-s-ipython span.cm-meta benar
.cm-s-ipython span.cm-qualifier Tenang
.cm-s-ipython span.cm-builtin Bro
.cm-s-ipython span.cm-bracket bahas
.cm-s-ipython span.cm-tag saja
.cm-s-ipython span.cm-attribute di sini
.cm-s-ipython span.cm-header Games
.cm-s-ipython span.cm-quote tergaco
.cm-s-ipython span.cm-link waktu
.cm-s-ipython span.cm-error ini
.cm-s-ipython span.cm-tab hanya satu
div.output_wrapper di
/* class for the output area when it should be height-limited */
div.output_scroll hanya di
/* output div while it is collapsed */
div.output_collapsed akan
div.out_prompt_overlay memberikan
div.out_prompt_overlay:hover return
div.output_prompt tertinggi
/* This class is the outer container of all output sections. */
div.output_area Hello
div.output_area .MathJax_Display para
div.output_area
div.output_area
div.output_area img,
div.output_area svg pengemar
div.output_area img.unconfined,
div.output_area svg.unconfined slot!
div.output_area .mglyph > img pernahkah
/* This is needed to protect the pre formating from global settings such
as that of bootstrap */
.output mendengar
@media (max-width: 540px) {
div.output_area istilah
}
div.output_area pre “slot gaco”
/* This class is for the output subarea inside the output_area and after
the prompt div. */
div.output_subarea Kalau
div.output_scroll div.output_subarea tidak
/* The rest of the output_* classes are for special styling of the different
output types */
/* all text output has this class: */
div.output_text bersiaplah
/* stdout/stderr are ‘text’ as well as ‘stream’, but execute_result/error are *not* streams */
div.output_stderr cinta
div.output_latex konsep
/* Empty output_javascript divs should have no height */
div.output_javascript:empty slot gaco
.js-error adalah
/* raw_input styles */
div.raw_input_container slot
pre.raw_input_prompt sering
input.raw_input kasih
input.raw_input:focus kemenangan
p.p-space Yup
div.output_unrecognized slot-slot
div.output_unrecognized a disebut
div.output_unrecognized a:hover adalah
.rendered_html andalannya
.rendered_html :link tuk
.rendered_html :visited membawa
.rendered_html h1:first-child pulang
.rendered_html h2:first-child hasil.
.rendered_html h3:first-child but
.rendered_html h4:first-child cemana
.rendered_html h5:first-child caranya
.rendered_html h6:first-child jumpain
.rendered_html ul:not(.list-inline),
.rendered_html ol:not(.list-inline) slot gaco
.rendered_html * + ul benar
.rendered_html * + ol Santai
.rendered_html pre,
.rendered_html tr,
.rendered_html th,
.rendered_html tbody tr:nth-child(odd) Bro
.rendered_html tbody tr:hover bahas
.rendered_html * + table aja
.rendered_html * + p di sini
.rendered_html * + img Gaming
.rendered_html img,
.rendered_html img.unconfined,
.rendered_html * + .alert terpopuler
[dir=”rtl”]
div.text_cell waktu
@media (max-width: 540px) {
div.text_cell > div.prompt ini
}
div.text_cell_render satu-satunya
a.anchor-link:link di
h1:hover .anchor-link,
h2:hover .anchor-link,
h3:hover .anchor-link,
h4:hover .anchor-link,
h5:hover .anchor-link,
h6:hover .anchor-link hanya di
.text_cell.rendered .input_area yang
.text_cell.rendered
.text_cell.rendered .rendered_html tr,
.text_cell.rendered .rendered_html th,
.text_cell.rendered
.text_cell.unrendered .text_cell_render menyediakan
.text_cell .dropzone .input_area ROI
.cm-header-1,
.cm-header-2,
.cm-header-3,
.cm-header-4,
.cm-header-5,
.cm-header-6 terbaik
.cm-header-1 {
font-size: 185.7%;
}
.cm-header-2 {
font-size: 157.1%;
}
.cm-header-3 {
font-size: 128.6%;
}
.cm-header-4 {
font-size: 110%;
}
.cm-header-5 {
font-size: 100%;
font-style: italic;
}
.cm-header-6 {
font-size: 100%;
font-style: italic;
}
.highlight pre .hll { background-color: #ffffcc }
.highlight pre { background: #f8f8f8; }
.highlight pre .c { color: #408080; font-style: italic } /* Comment */
.highlight pre .err { border: 1px solid #FF0000 } /* Error */
.highlight pre .k { color: #008000; font-weight: bold } /* Keyword */
.highlight pre .o { color: #666666 } /* Operator */
.highlight pre .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
.highlight pre .cm { color: #408080; font-style: italic } /* Comment.Multiline */
.highlight pre .cp { color: #BC7A00 } /* Comment.Preproc */
.highlight pre .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
.highlight pre .c1 { color: #408080; font-style: italic } /* Comment.Single */
.highlight pre .cs { color: #408080; font-style: italic } /* Comment.Special */
.highlight pre .gd { color: #A00000 } /* Generic.Deleted */
.highlight pre .ge { font-style: italic } /* Generic.Emph */
.highlight pre .gr { color: #FF0000 } /* Generic.Error */
.highlight pre .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.highlight pre .gi { color: #00A000 } /* Generic.Inserted */
.highlight pre .go { color: #888888 } /* Generic.Output */
.highlight pre .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
.highlight pre .gs { font-weight: bold } /* Generic.Strong */
.highlight pre .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
.highlight pre .gt { color: #0044DD } /* Generic.Traceback */
.highlight pre .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
.highlight pre .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
.highlight pre .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
.highlight pre .kp { color: #008000 } /* Keyword.Pseudo */
.highlight pre .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
.highlight pre .kt { color: #B00040 } /* Keyword.Type */
.highlight pre .m { color: #666666 } /* Literal.Number */
.highlight pre .s { color: #BA2121 } /* Literal.String */
.highlight pre .na { color: #7D9029 } /* Name.Attribute */
.highlight pre .nb { color: #008000 } /* Name.Builtin */
.highlight pre .nc { color: #0000FF; font-weight: bold } /* Name.Class */
.highlight pre .no { color: #880000 } /* Name.Constant */
.highlight pre .nd { color: #AA22FF } /* Name.Decorator */
.highlight pre .ni { color: #999999; font-weight: bold } /* Name.Entity */
.highlight pre .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
.highlight pre .nf { color: #0000FF } /* Name.Function */
.highlight pre .nl { color: #A0A000 } /* Name.Label */
.highlight pre .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
.highlight pre .nt { color: #008000; font-weight: bold } /* Name.Tag */
.highlight pre .nv { color: #19177C } /* Name.Variable */
.highlight pre .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
.highlight pre .w { color: #bbbbbb } /* Text.Whitespace */
.highlight pre .mb { color: #666666 } /* Literal.Number.Bin */
.highlight pre .mf { color: #666666 } /* Literal.Number.Float */
.highlight pre .mh { color: #666666 } /* Literal.Number.Hex */
.highlight pre .mi { color: #666666 } /* Literal.Number.Integer */
.highlight pre .mo { color: #666666 } /* Literal.Number.Oct */
.highlight pre .sa { color: #BA2121 } /* Literal.String.Affix */
.highlight pre .sb { color: #BA2121 } /* Literal.String.Backtick */
.highlight pre .sc { color: #BA2121 } /* Literal.String.Char */
.highlight pre .dl { color: #BA2121 } /* Literal.String.Delimiter */
.highlight pre .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
.highlight pre .s2 { color: #BA2121 } /* Literal.String.Double */
.highlight pre .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
.highlight pre .sh { color: #BA2121 } /* Literal.String.Heredoc */
.highlight pre .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
.highlight pre .sx { color: #008000 } /* Literal.String.Other */
.highlight pre .sr { color: #BB6688 } /* Literal.String.Regex */
.highlight pre .s1 { color: #BA2121 } /* Literal.String.Single */
.highlight pre .ss { color: #19177C } /* Literal.String.Symbol */
.highlight pre .bp { color: #008000 } /* Name.Builtin.Pseudo */
.highlight pre .fm { color: #0000FF } /* Name.Function.Magic */
.highlight pre .vc { color: #19177C } /* Name.Variable.Class */
.highlight pre .vg { color: #19177C } /* Name.Variable.Global */
.highlight pre .vi { color: #19177C } /* Name.Variable.Instance */
.highlight pre .vm { color: #19177C } /* Name.Variable.Magic */
.highlight pre .il { color: #666666 } /* Literal.Number.Integer.Long */
MathJax.Hub.Config({
tex2jax: {inlineMath: [[‘$’,’$’], [‘\\(‘,’\\)’]]}
});
While wandering around at kaggle.com, I encountered the 2016 New Coder Survey dataset. It’s a survey targeted at people learning to code.
The survey included a question asking the participants what’s the job role they’re interested in. I was curious to know whether there’s an association between a new coder’s age and the job role he dreams of.
In this post I’ll try to answer this question. The post will be a technical post, in which I’ll walk you through data exploration, how to approach our research question, and what statistical tests can be used in order to find the answer.
First, let’s import some stuff:
import numpy as np
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
data = pd.read_csv('2016-FCC-New-Coders-Survey-Data.csv')
For the purpose of our research question, respondents who didn’t specify their age have no value for us.
On the contrary, respondents who did specify their age but didn’t specify the job role do give us general information about the age of new coders.
data = data[data['Age'].notnull()]
data['JobRoleInterest'] = data['JobRoleInterest'].fillna('unspecified').map(lambda job: job.strip())
Let’s inspect the general ages of the respondents:
data['Age'].plot.hist(title='ages (N=%d, mean=%.2f, median=%.2f)' % (len(data), data['Age'].mean(), data['Age'].median()),
bins=100)
We can see the data looks almost normal. The mean is greater than the median, which according to nonparametric skewness definition implies the distribution has right skewness. It means there are more older people learning to code than younger people (compared to the mean age of about 29).
An interesting thing to note is that my wife is also learning to code as I’m writing this post; and she’s the age of 29, as the mean age. I guess it’ll help me remember that…
Drilling down¶
Let’s explore the average age within the groups of respondents, where each group is defined by the job role of interest.
average_ages = pd.DataFrame([{
'job': group,
'N': len(data.loc[rows]),
'average age': data.loc[rows]['Age'].mean()
} for group, rows in data.groupby('JobRoleInterest').groups.items()]).sort_values(by='average age')
sns.barplot(data=average_ages, x='average age', y='job', hue='N', palette='Blues_d')
Each group has a bar with a length equal to the group’s mean age. The bigger the group is, the lighter the color of the bar gets.
I chose to focus on the Quality Assurance engineers group. They have a mean age bigger than the rest of the groups, of about 32 years. It’s interesting to see if this effect is due to chance, or maybe the desire to be a QA engineer is more likely to be associated with older people on average.
Let me explain what “due to chance” means: remember that we have limited data. It tells the story of only a fraction of the population of new coders. Specifically, we only have data of 91 QA engineers. In reality there are more new coders who want to be QA engineers: there are those who didn’t participate in the survey, and there are the future new coders who haven’t even started to learn how to code yet. 32 years is the new coders QA engineers sample mean. We’re interested to know if the new coders QA population mean is bigger than other new coders population mean. We don’t have data of the entire population, so the best we can do is to estimate their mean. We do this by using the sample mean. The sample mean is a random variable; it depends on the sample we’ve got. We could have gotten some other sample of QA engineers, and the sample mean would have been different. The population mean on the other hand is a non random parameter of the distribution.
Given the new coders sample we have, there are two complementary possibilities:
- The QA population’s mean age equals to general new coders population’s mean age.
- They are different.
The sample supports the second option, but it could be due to chance (especially because of the small QA sample size). Next, we’ll use statistical tests to quantify how likely it is to observe such a difference due to chance.
Statistical framework¶
For the uninitiated reader, many statistical tests work as follows: one first states a null hypothesis, which is the default option that can describe the world. In our case, the null hypothesis is that there’s no difference between QA engineers mean age and other new coders mean age. Then, an alternative hypothesis is stated. In our case the alternative is that a difference does exist. Afterwards, a test statistic (a function of the data) is calculated. Assuming the null hypothesis is true, the distribution of the test statistic is known, and the probability to observe the value at hand of the test statistic can be calculated. This probability is called p-value. If the p-value is smaller than a significance level we decide before running the experiment (e.g. 0.01), the null hypothesis is rejected in favor of the alternative hypothesis.
Z-Test¶
Assuming the ages are normally distributed, we can use the Z-Test (note that even if they’re not normally distributed, we can harness the central limit theorem and use the fact that the mean age is approximately normally distributed).
If we knew the mean age’s standard deviation, we could normalize the mean age in the following way:
$Z = \frac{\bar{X} – \mu}{\sigma}$
$\bar{X}$ is the observed mean age. $\mu$ is the expected mean age given the null hypothesis is true. In our case, we choose the general new coders sample mean age of 29.18 to be $\mu$. Note there are better ways of estimating $\mu$ (and more suitable statistical tests, e.g. – two-sample T-Test, which I choose not to describe in this post to keep it simple), but for the sake of simplicity I’ll stick to 29.18; since I have a lot of data I think it’s a good enough estimator.
$Z$ is a normalized normal random variable: $Z \sim Norm(0, 1)$. As such, given the observed value of the $\bar{X}$, we can calculate its probability, and see if it’s below a reasonable threshold. If it does – we’ll reject the null hypothesis.
Z-Test assumes we know $\sigma$. Well, we don’t. Despair not, T-Test for the rescue!
T-Test¶
T-Test is the same as Z-test, with the difference of not knowing $\sigma$. It works as follows:
$T = \frac{\bar{X} – \mu}{\left.s \, \middle/ \sqrt{N} \right.}$
$s$ is the sample standard deviation of the age. $\left.s \, \middle/ \sqrt{N} \right.$ is the sample standard deviation of the mean age (where $N$ is the number of samples). You can read how it’s derived here.
You can see this statistic is very similar to the one calculated at the Z-Test… The T statistic is distributed according to the T distribution. The T distribution arises whenever the variance of a normally distributed random variable is unknown.
It has one parameter called the degrees of freedom. I won’t go into details about that, but intuitively it captures the amount of data used for estimating the parameter. In our case, where $\sigma$ is the parameter being estimated, the degrees of freedom is $N – 1$.
The T distribution approaches the normal distribution as the degrees of freedom approaches infinity.
We’ll use scipy
in order to calculate this probability:
qa = average_ages[average_ages['job'] == 'Quality Assurance Engineer'].iloc[0]
t_score = float(qa['average age'] - data['Age'].mean()) / (data['Age'].std() / np.sqrt(qa['N']))
degrees_of_freedom = qa['N'] - 1
p_value = 1 - scipy.stats.t(df=degrees_of_freedom).cdf(t_score)
print 'The p-value is', p_value
This number is quite small. It means that there’s less than tenth percent chance that the QA engineers mean age deviation from the total population’s mean age is due to chance. If we would have chosen a significance level of 0.01 (which is a common choice), we’d reject the null hypothesis.
To complicate the things a little bit, you should note that what I just did is called one sided test: we calculated the probability of observing a T score bigger than or equal to the observed value. We actually care for difference in either direction: if the QA engineers mean age was significantly smaller than the expected value (the null hypothesis) it would be interesting as well. In T test, testing for a significant difference in either direction is done by multiplying the calculated p-value by two (due to the distribution’s symmetry), which gives us
print "The two sided test's p-value is", p_value * 2
Still significant…
Simulation¶
Statisticians like statistical tests. They are powerful, but they have two big disadvantages:
- They impose assumptions on the data. In our case, the sample mean age isn’t really normal. It’s approximately normal, so the T test result is an approximation (although quite a good one).
- Sometimes they make you test something that is similar but not exactly what you wish for. In our case it’s not true: we were interested in the mean difference, and that’s exactly what the T test provides. If we wanted to test something more complicated with no classical statistical test for the use case, we’d be in trouble.
To solve both problems, we can use simulations. We first calculate some statistic of the observed data (as was done before). Instead of calculating the probability to observe that statistic value, we simulate many experiments (assuming the null hypothesis is true), and we count the fraction of the experiments that yielded a statistic which is at least as extreme as the one we calculated before.
Let me explain why simulations work: if one chooses to use a statistical test whose assumptions are met, the calculated p-value is the true probability of observing a statistic at least as extreme as the one calculated from the actual data (assuming the null hypothesis is true). We can treat it as a Bernoulli trial, where the p-value is the probability of success. Performing simulations is like running the Bernoulli trial many times. The goal of running the simulations is to estimate the probability of success, which is the p-value. Using the MLE method, we simply estimate this probability to be the fraction of successes. This estimator approaches the true p-value as the number of simulations approaches infinity.
Let’s try this approach in our case: the null hypothesis states that there’s no difference between the QA engineers mean age and the general new coders mean age. It means that the mean age observed by the group of QA engineers might as well be observed by any random subgroup of new coders. So that’s exactly what we’ll do; we’ll randomly sample a group of the same size as the QA engineers group and calculate the difference between their mean age and the general new coders mean age:
observed_diff = abs(qa['average age'] - data['Age'].mean())
num_of_simulations = 100000
num_of_extreme_values = sum(abs(data.sample(n=qa['N'])['Age'].mean() - data['Age'].mean()) >= observed_diff
for _ in range(num_of_simulations))
p_value = float(num_of_extreme_values) / num_of_simulations
print 'The p-value is', p_value
We got a similar result, but a little bit different. Note that the more simulations we run, the more exact the result will be (it’ll tend to the true p-value).
Why does it differ from the T test approach? Because the T test assumes the data is normal, while it’s only approximately normal.
In the old days where computers were slow and computing power was expensive, it didn’t make a lot of sense to run simulations. Nowadays however, it does make sense (in some situations, anyway). It’s a nice approach which should be kept in the back of our head in case of need. Here is a great post which further discusses the simulations approach.
Multiple testing problem¶
Some of you may have noticed that my analysis is problematic: it incurs the multiple testing problem. Only after examining the data did I notice that the QA engineers mean age is higher than other groups’ mean age. Then I ran a statistical test to see if it could be due to chance. The problem is that because we have multiple groups, the probability that one group will have a significant difference in mean age gets bigger the more groups we examine. There are plenty of methods to account for this problem. One option is to test an hypothesis on a dataset other than the dataset which suggested that hypothesis. Other options involve adjusting the significance level accordingly.
Implications¶
Are there any implications to this analysis? First, we should distinguish statistical significance from practical significance. It might be the case that there’s a statistical significance, meaning that the observed data isn’t just a result of chance, a difference truly exists. Practical significance means that the difference we observed has practical implications: one can act accordingly. Practical significance depends on the problem at hand. In our case I think there’s not much to do with the observed mean age difference… If general new coders were 20 years old on average while QA were 40, we could have suggested, for example, that schools for new coders should alter the timetable for the QA track because the QA students probably have kids waiting for them back home.
if (!document.getElementById(‘mathjaxscript_pelican_#%@#$@#’)) {
var mathjaxscript = document.createElement(‘script’);
mathjaxscript.id = ‘mathjaxscript_pelican_#%@#$@#’;
mathjaxscript.type=”text/javascript”;
mathjaxscript.src=”//cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML”;
mathjaxscript[(window.opera ? “innerHTML” : “text”)] =
“MathJax.Hub.Config({” +
” config: [‘MMLorHTML.js’],” +
” TeX: { extensions: [‘AMSmath.js’,’AMSsymbols.js’,’noErrors.js’,’noUndefined.js’], equationNumbers: { autoNumber: ‘AMS’ } },” +
” jax: [‘input/TeX’,’input/MathML’,’output/HTML-CSS’],” +
” extensions: [‘tex2jax.js’,’mml2jax.js’,’MathMenu.js’,’MathZoom.js’],” +
” displayAlign: ‘center’,” +
” displayIndent: ‘0em’,” +
” showMathMenu: true,” +
” tex2jax: { ” +
” inlineMath: [ [‘$’,’$’] ], ” +
” displayMath: [ [‘$$’,’$$’] ],” +
” processEscapes: true,” +
” preview: ‘TeX’,” +
” }, ” +
” ‘HTML-CSS’: { ” +
” linebreaks: { automatic: true, width: ‘95% container’ }, ” +
” styles: { ‘.MathJax_Display, .MathJax .mo, .MathJax .mi, .MathJax .mn’: {color: ‘black ! important’} }” +
” } ” +
“}); “;
(document.body || document.getElementsByTagName(‘head’)[0]).appendChild(mathjaxscript);
}
{Halo|Hello|Hai}, {para|sobat} {pengemar|pencinta} {slots|slot!} {Pernah|pernahkah} {denger|mendengar} {istilah|semboyan} {“slot gacor”|”slot gaco”|”slot demo”|”raja slot}? {Kalau|jika} {belum|tidak}, {bersiaplah|siap-siap} jatuh {cinta|hati} sama {konsep|program} ini. {slot gacor|slot gaco|slot demo|raja slot} {adalah|merupakan} mesin {slot|slots} yang {sering|selalu} {memberi|kasih} {kemenangan|win}. {Ya|Yup}, {mesin-mesin|slot-slot} ini bisa {dibilang|dikatakan|disebut} {adalah|sebagai} {jagoannya|andalannya} {buat|tuk} {bawa|membawa} {pulang|come back} {hasil.|cuan.} {tapi|any way|but}, {gimana|cemana} sih {caranya|
tekniknya} {jumpain|nemuin} {slot gacor|slot gaco|slot demo|raja lot} yang {tepat|benar}? {Tenang|Santai} {Bro|Bro and Sis}, kita {bahas|beri} {santai|tenang] {aja|saja} {di sini|di tempat ini}
{Permainan|Game|Gaming|Games} {terbaik|terpercaya|tergacor|tergaco|terpopuler} {saat|waktu} {ini|sekarang} {hanya satu|satu-satunya} {berada|di} Indonesia {yaitu|hanya di} {yang|pasti|akan} {memberikan|menyediakan} {imbal hasil|return|ROI|return on Investment} {terbaik|tertinggi|terbesar}