CHJBootCamp2018/DAY2Drill の履歴ソース(No.1)

履歴一覧
差分を表示
現在との差分を表示
履歴を表示
CHJBootCamp2018/DAY2Drill へ行く。
- 1 (2018-08-23 (木) 13:22:27)
- 2 (2018-08-23 (木) 14:42:45)
[[CHJBootCamp2018/DAY2]]

**演習1

 select L.語彙素,L.語彙素読み,count(*) as 粗頻度,
 count (case when B.ジャンル='キリシタン資料' then 1
 else 0 end) as キリシタン資料,
 count (case when B.ジャンル='狂言' then 1 else 0 end)
 as 狂言資料
 from &#12199;単位as L inner join 書誌情報as B on L.サンプル
 ID=B.サンプルID
 where L.語彙素like N'御%有る'
 group by L.語彙素,L.語彙素読み
 order by count(L.語彙素) desc


**演習2
 select 語彙素,語彙素読み,
 sum(case when 原文文字列like '%[ァ-ヴ]%'
 then 1 else 0 end) as 原文カタカナ,
 sum(case when 原文文字列not like '%[ァ-ヴ]%'
 then 1 else 0 end) as 原文カタカナ以外,
 count(*) as 粗頻度from 短単位
 where サブコーパス名like '江戸'
 and 品詞='感動詞-一般'
 group by 語彙素,語彙素読み
 order by 語彙素読み


**演習3
 select 語彙素,語彙素読み,品詞,語種,count(*) as 粗頻度,
 count (case when サブコーパス名='奈良' then 1 else 0 end) as 奈良,
 count (case when サブコーパス名='平安' then 1 else 0 end) as 平安,
 count (case when サブコーパス名='鎌倉' then 1 else 0 end) as 鎌倉,
 count (case when サブコーパス名='室町' then 1 else 0 end) as 室町,
 count (case when サブコーパス名='江戸' then 1 else 0 end) as 江戸,
 count (case when サブコーパス名='明治・大正' then 1 else 0 end)
 as 明治・大正
 from 短単位
 where 品詞like '形容詞%' and len(語彙素読み)>3
 group by 語彙素,語彙素読み,品詞,語種
 order by 語彙素読み


**演習4
 use chunagon_chj
 select s1.サブコーパス名,s1.語彙素,s1.語彙素読み,s1.品詞,
 dbo.fn前文脈(s1.サンプルID,s1.出現書字形開始位置,20)
 as 前文脈,s1.キー,
 dbo.fn後文脈(s1.サンプルID,s1.出現書字形開始位置,20) as 後
 文脈
 from 短単位as s1
 inner join 短単位as s2 on s1.サンプルID=s2.サンプルID
 and s1.連番+ 10 = s2.連番
 where s1.品詞like '%助詞%' and s2.語彙素= ‘恐れる’
 order by s1.品詞,s1.語彙素


**演習5
 select s1.語彙素,s1.語彙素読み,s1.品詞,
 dbo.fn前文脈(s1.サンプルID,s1.出現書字形開始位置,20)
 as 前文脈, s1.キー,
 dbo.fn後文脈(s1.サンプルID,s1.出現書字形開始位置,20)
 as 後文脈
 from 短単位as s1
 inner join 短単位as s2 on s1.サンプルID=s2.サンプルID
 and s1.連番= s2.連番+ 10
 where s2.語彙素= '良い' and s2.活用形like '連体形%'
 and s1.品詞like '名詞%' order by s1.品詞,s1.語彙素


**演習6
 use chunagon_chj
 select s1.キー+s2.キー+s3.キー, s1.語彙素+'/'+s2.語彙素+'/'+s3.語彙素,
 count(*) as 頻度from 短単位as s1
 inner join 短単位as s2 on s1.サンプルID=s2.サンプルID
 and s1.連番+ 10 = s2.連番
 inner join 短単位as s3 on s2.サンプルID=s3.サンプルID
 and s2.連番+ 10 = s3.連番
 where s1.サブコーパス名='平安'and s1.品詞like '助動詞%' and s2.品詞
 like '助動詞%' and s3.品詞like '助動詞%'
 group by s1.キー+s2.キー+s3.キー, s1.語彙素+'/'+s2.語彙素+'/'+s3.語彙素
 order by count(*) desc


**演習7
 select 品詞,
 sum(case when 原文文字列like '%[ァ-ヴ]%'
 then 1 else 0 end) as 原文カタカナ,
 count(*) as 粗頻度,
 CONVERT(float,sum(case when 原文文字列like '%[ァ-
 ヴ]%'then 1 else 0 end))/count(*)
 from 短単位
 where サブコーパス名like '江戸'
 and 品詞='感動詞-一般'
 group by 品詞


**演習8
 use unidicSQL
 SELECT 語彙素,語形,書字形from 短単位書字形as O
 inner join 短単位語形as F on F.語形ID=O.語形ID
 inner join 短単位語彙素as L on L.語彙素ID=F.語彙素ID
 where 語彙素like '為さる’ and 語彙素読みlike 'ナサル'
 group by 語彙素,語形,書字形
 order by 語形


**演習9
 use unidicSQL
 select L.語彙素,L.語彙素読み,S.サブコーパス名,count(S.
 キー)as 粗頻度
 from 短単位語彙素as L
 inner join chunagon_chj.dbo.短単位as S on L.語彙素
 ID=S.語彙素ID
 where L.語彙素like '秋%' and L.語彙素読みlike 'アキ%'
 group by L.語彙素,L.語彙素読み, S.サブコーパス名
 order by S.サブコーパス名