Logo

PEARSON_q

Updated 2023-11-01 21:06:02.320000

Syntax

SELECT [westclintech].[wct].[PEARSON_q] (
   <@Known_y_Known_x_RangeQuery, nvarchar(4000),>)

Description

Use the scalar function PEARSON_q to calculate the product moment correlation coefficient between two datasets. The equation for the Pearson correlation coefficient is

r_{xy}=\frac{\sum(x-\bar{x})(y-\bar{y})}{\sqrt{\sum(x-\bar{x})^2\sum(y-\bar{y})^2}}

Arguments

@Known_y_Known_x_RangeQuery

the select statement, as text, used to determine the known y- and x-values to be used in the PEARSON_q calculation.

Return Type

float

Remarks

If the number of known-x data points is different than the number of known-y data points, PEARSON_q will return an error.

If the standard deviation of either set of data points is zero, then PEARSON_q will return an error.

No GROUP BY is required for this function even though it produces aggregated results.

Examples

To determine the Pearson coefficient between grades in math and grades in science:

CREATE TABLE #s1 (
    [Student] [nvarchar](50) NOT NULL,
    [subject] [nvarchar](50) NOT NULL,
    [grade] [float] NULL);
INSERT INTO #S1
VALUES ('Student 01', 'Math', 97.19);
INSERT INTO #S1
VALUES ('Student 02', 'Math', 94.07);
INSERT INTO #S1
VALUES ('Student 03', 'Math', 92.78);
INSERT INTO #S1
VALUES ('Student 04', 'Math', 91.86);
INSERT INTO #S1
VALUES ('Student 05', 'Math', 91.12);
INSERT INTO #S1
VALUES ('Student 06', 'Math', 90.48);
INSERT INTO #S1
VALUES ('Student 07', 'Math', 89.9);
INSERT INTO #S1
VALUES ('Student 08', 'Math', 89.36);
INSERT INTO #S1
VALUES ('Student 09', 'Math', 88.86);
INSERT INTO #S1
VALUES ('Student 10', 'Math', 88.37);
INSERT INTO #S1
VALUES ('Student 11', 'Math', 87.89);
INSERT INTO #S1
VALUES ('Student 12', 'Math', 87.42);
INSERT INTO #S1
VALUES ('Student 13', 'Math', 86.95);
INSERT INTO #S1
VALUES ('Student 14', 'Math', 86.47);
INSERT INTO #S1
VALUES ('Student 15', 'Math', 85.98);
INSERT INTO #S1
VALUES ('Student 16', 'Math', 85.46);
INSERT INTO #S1
VALUES ('Student 17', 'Math', 84.92);
INSERT INTO #S1
VALUES ('Student 18', 'Math', 84.32);
INSERT INTO #S1
VALUES ('Student 19', 'Math', 83.65);
INSERT INTO #S1
VALUES ('Student 20', 'Math', 82.86);
INSERT INTO #S1
VALUES ('Student 21', 'Math', 81.85);
INSERT INTO #S1
VALUES ('Student 22', 'Math', 80.32);
INSERT INTO #S1
VALUES ('Student 01', 'Science', 97.64);
INSERT INTO #S1
VALUES ('Student 02', 'Science', 95.02);
INSERT INTO #S1
VALUES ('Student 03', 'Science', 93.94);
INSERT INTO #S1
VALUES ('Student 04', 'Science', 93.17);
INSERT INTO #S1
VALUES ('Student 05', 'Science', 92.54);
INSERT INTO #S1
VALUES ('Student 06', 'Science', 92);
INSERT INTO #S1
VALUES ('Student 07', 'Science', 91.51);
INSERT INTO #S1
VALUES ('Student 08', 'Science', 91.06);
INSERT INTO #S1
VALUES ('Student 09', 'Science', 90.64);
INSERT INTO #S1
VALUES ('Student 10', 'Science', 90.23);
INSERT INTO #S1
VALUES ('Student 11', 'Science', 89.83);
INSERT INTO #S1
VALUES ('Student 12', 'Science', 89.43);
INSERT INTO #S1
VALUES ('Student 13', 'Science', 89.04);
INSERT INTO #S1
VALUES ('Student 14', 'Science', 88.64);
INSERT INTO #S1
VALUES ('Student 15', 'Science', 88.22);
INSERT INTO #S1
VALUES ('Student 16', 'Science', 87.79);
INSERT INTO #S1
VALUES ('Student 17', 'Science', 87.33);
INSERT INTO #S1
VALUES ('Student 18', 'Science', 86.83);
INSERT INTO #S1
VALUES ('Student 19', 'Science', 86.26);
INSERT INTO #S1
VALUES ('Student 20', 'Science', 85.6);
INSERT INTO #S1
VALUES ('Student 21', 'Science', 84.75);
INSERT INTO #S1
VALUES ('Student 22', 'Science', 83.47);
INSERT INTO #S1
VALUES ('Student 01', 'Foreign Language', 96.52);
INSERT INTO #S1
VALUES ('Student 02', 'Foreign Language', 92.64);
INSERT INTO #S1
VALUES ('Student 03', 'Foreign Language', 91.05);
INSERT INTO #S1
VALUES ('Student 04', 'Foreign Language', 89.91);
INSERT INTO #S1
VALUES ('Student 05', 'Foreign Language', 88.99);
INSERT INTO #S1
VALUES ('Student 06', 'Foreign Language', 88.19);
INSERT INTO #S1
VALUES ('Student 07', 'Foreign Language', 87.47);
INSERT INTO #S1
VALUES ('Student 08', 'Foreign Language', 86.81);
INSERT INTO #S1
VALUES ('Student 09', 'Foreign Language', 86.18);
INSERT INTO #S1
VALUES ('Student 10', 'Foreign Language', 85.58);
INSERT INTO #S1
VALUES ('Student 11', 'Foreign Language', 84.99);
INSERT INTO #S1
VALUES ('Student 12', 'Foreign Language', 84.4);
INSERT INTO #S1
VALUES ('Student 13', 'Foreign Language', 83.82);
INSERT INTO #S1
VALUES ('Student 14', 'Foreign Language', 83.22);
INSERT INTO #S1
VALUES ('Student 15', 'Foreign Language', 82.61);
INSERT INTO #S1
VALUES ('Student 16', 'Foreign Language', 81.97);
INSERT INTO #S1
VALUES ('Student 17', 'Foreign Language', 81.29);
INSERT INTO #S1
VALUES ('Student 18', 'Foreign Language', 80.55);
INSERT INTO #S1
VALUES ('Student 19', 'Foreign Language', 79.72);
INSERT INTO #S1
VALUES ('Student 20', 'Foreign Language', 78.74);
INSERT INTO #S1
VALUES ('Student 21', 'Foreign Language', 77.49);
INSERT INTO #S1
VALUES ('Student 22', 'Foreign Language', 75.6);
INSERT INTO #S1
VALUES ('Student 01', 'History', 98.09);
INSERT INTO #S1
VALUES ('Student 02', 'History', 95.97);
INSERT INTO #S1
VALUES ('Student 03', 'History', 95.09);
INSERT INTO #S1
VALUES ('Student 04', 'History', 94.47);
INSERT INTO #S1
VALUES ('Student 05', 'History', 93.96);
INSERT INTO #S1
VALUES ('Student 06', 'History', 93.52);
INSERT INTO #S1
VALUES ('Student 07', 'History', 93.13);
INSERT INTO #S1
VALUES ('Student 08', 'History', 92.77);
INSERT INTO #S1
VALUES ('Student 09', 'History', 92.42);
INSERT INTO #S1
VALUES ('Student 10', 'History', 92.09);
INSERT INTO #S1
VALUES ('Student 11', 'History', 91.77);
INSERT INTO #S1
VALUES ('Student 12', 'History', 91.45);
INSERT INTO #S1
VALUES ('Student 13', 'History', 91.13);
INSERT INTO #S1
VALUES ('Student 14', 'History', 90.8);
INSERT INTO #S1
VALUES ('Student 15', 'History', 90.47);
INSERT INTO #S1
VALUES ('Student 16', 'History', 90.12);
INSERT INTO #S1
VALUES ('Student 17', 'History', 89.74);
INSERT INTO #S1
VALUES ('Student 18', 'History', 89.34);
INSERT INTO #S1
VALUES ('Student 19', 'History', 88.88);
INSERT INTO #S1
VALUES ('Student 20', 'History', 88.34);
INSERT INTO #S1
VALUES ('Student 21', 'History', 87.66);
INSERT INTO #S1
VALUES ('Student 22', 'History', 86.62);
INSERT INTO #S1
VALUES ('Student 01', 'Literature', 98.09);
INSERT INTO #S1
VALUES ('Student 02', 'Literature', 95.97);
INSERT INTO #S1
VALUES ('Student 03', 'Literature', 95.09);
INSERT INTO #S1
VALUES ('Student 04', 'Literature', 94.47);
INSERT INTO #S1
VALUES ('Student 05', 'Literature', 93.96);
INSERT INTO #S1
VALUES ('Student 06', 'Literature', 93.52);
INSERT INTO #S1
VALUES ('Student 07', 'Literature', 93.13);
INSERT INTO #S1
VALUES ('Student 08', 'Literature', 92.77);
INSERT INTO #S1
VALUES ('Student 09', 'Literature', 92.42);
INSERT INTO #S1
VALUES ('Student 10', 'Literature', 92.09);
INSERT INTO #S1
VALUES ('Student 11', 'Literature', 91.77);
INSERT INTO #S1
VALUES ('Student 12', 'Literature', 91.45);
INSERT INTO #S1
VALUES ('Student 13', 'Literature', 91.13);
INSERT INTO #S1
VALUES ('Student 14', 'Literature', 90.8);
INSERT INTO #S1
VALUES ('Student 15', 'Literature', 90.47);
INSERT INTO #S1
VALUES ('Student 16', 'Literature', 90.12);
INSERT INTO #S1
VALUES ('Student 17', 'Literature', 89.74);
INSERT INTO #S1
VALUES ('Student 18', 'Literature', 89.34);
INSERT INTO #S1
VALUES ('Student 19', 'Literature', 88.88);
INSERT INTO #S1
VALUES ('Student 20', 'Literature', 88.34);
INSERT INTO #S1
VALUES ('Student 21', 'Literature', 87.66);
INSERT INTO #S1
VALUES ('Student 22', 'Literature', 86.62);

select wct.PEARSON_q('Select a.grade, b.grade
      from #s1 a, #s1 b
      where a.subject = ' + Char(39) + 'Math' + Char(39) + ' and
      b.subject = ' + Char(39) + 'Science' + Char(39) + 'and
      a.student = b.student');

This produces the following result.

{"columns":[{"field":"column 1","headerClass":"ag-right-aligned-header","cellClass":"ag-right-aligned-cell"}],"rows":[{"column 1":"0.999999494275124"}]}

To determine the correlation between grades in math and grades in science on a de-normalized table:

CREATE TABLE #s2 (
    [student] [nvarchar](50) NOT NULL,
    [math] [float] NULL,
    [science] [float] NULL,
    [f_language] [float] NULL,
    [history] [float] NULL,
    [literature] [float] NULL);
insert into #s2
values ('Student 01', 97.19, 97.64, 96.52, 98.09, 98.09);
insert into #s2
values ('Student 02', 94.07, 95.02, 92.64, 95.97, 95.97);
insert into #s2
values ('Student 03', 92.78, 93.94, 91.05, 95.09, 95.09);
insert into #s2
values ('Student 04', 91.86, 93.17, 89.91, 94.47, 94.47);
insert into #s2
values ('Student 05', 91.12, 92.54, 88.99, 93.96, 93.96);
insert into #s2
values ('Student 06', 90.48, 92, 88.19, 93.52, 93.52);
insert into #s2
values ('Student 07', 89.9, 91.51, 87.47, 93.13, 93.13);
insert into #s2
values ('Student 08', 89.36, 91.06, 86.81, 92.77, 92.77);
insert into #s2
values ('Student 09', 88.86, 90.64, 86.18, 92.42, 92.42);
insert into #s2
values ('Student 10', 88.37, 90.23, 85.58, 92.09, 92.09);
insert into #s2
values ('Student 11', 87.89, 89.83, 84.99, 91.77, 91.77);
insert into #s2
values ('Student 12', 87.42, 89.43, 84.4, 91.45, 91.45);
insert into #s2
values ('Student 13', 86.95, 89.04, 83.82, 91.13, 91.13);
insert into #s2
values ('Student 14', 86.47, 88.64, 83.22, 90.8, 90.8);
insert into #s2
values ('Student 15', 85.98, 88.22, 82.61, 90.47, 90.47);
insert into #s2
values ('Student 16', 85.46, 87.79, 81.97, 90.12, 90.12);
insert into #s2
values ('Student 17', 84.92, 87.33, 81.29, 89.74, 89.74);
insert into #s2
values ('Student 18', 84.32, 86.83, 80.55, 89.34, 89.34);
insert into #s2
values ('Student 19', 83.65, 86.26, 79.72, 88.88, 88.88);
insert into #s2
values ('Student 20', 82.86, 85.6, 78.74, 88.34, 88.34);
insert into #s2
values ('Student 21', 81.85, 84.75, 77.49, 87.66, 87.66);
insert into #s2
values ('Student 22', 80.32, 83.47, 75.6, 86.62, 86.62);

SELECT wct.PEARSON_q('SELECT math, science from #s2');

This produces the following result.

{"columns":[{"field":"column 1","headerClass":"ag-right-aligned-header","cellClass":"ag-right-aligned-cell"}],"rows":[{"column 1":"0.999999494275124"}]}