공부 메모 중


[환경]

 - centos 6.4

 - java 7

 - hadoop-2.7.3

 - tajo-0.11.3


[구성]

server1 : NameNode, TajoMaster

server2 : DataNode, TajoWorker (SecondaryNameNode)

server3 : DataNode, TajoWorker

server4 : DataNode, TajoWorker

1
2
3
4
5
6
# 클러스터 구성은 생략
 
bin/hadoop namenode -format # 초기화 시 rm -Rf /tmp/hadoop-tomcat 하고 나서
bin/start-all.sh
 
접속 http://192.168.100:50070/dfshealth.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
<p>-- 데이터 : {"reg_dt":1474350438172,"jsessionid":"c09db86d-22c1-464b-92af-d612d7274c66","url_now":"http://ddakker.pe.kr/b","click_page_url":"http://ddakker.pe.kr/a","ip":"192.168.0.100","user_key":"1234567890","url_before":"http://ddakker.pe.kr/a","session_first_time":1474289056726,"user_agent":"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko"}
 
bin/tsql
 
default> \dfs -mkdir /tajo
 
default> \dfs -ls /
Found 1 items
drwxr-xr-x   - hadoop supergroup          0 2016-09-20 15:27 /tajo
 
default> \dfs -put /logs/weblog /tajo/weblog
 
default> \dfs -du -s -h /tajo/weblog
1.5 G  /tajo/weblog
 
create external table weblog (
reg_dt INT8,
url_before text,
jsessionid text,
ip text,
url_now text,
click_page_url text,
user_key text,
session_first_time INT8)
USING JSON LOCATION 'hdfs:/tajo/weblog';
 
default> select count(*) from weblog;
Progress: 0%, response time: 0.465 sec
Progress: 0%, response time: 0.467 sec
Progress: 0%, response time: 0.869 sec
Progress: 0%, response time: 1.67 sec
Progress: 46%, response time: 2.672 sec
Progress: 100%, response time: 2.735 sec
?count
-------------------------------
3503135
(1 rows, 2.735 sec, 16 B selected)
</p><p></p>



공부 메모 중...


[환경]

 - centos 6.4

 - java 7

 - spark-2.0.0-bin-hadoop2.7

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
cd jars
# json format... 관련..
wget http://www.congiu.net/hive-json-serde/1.3/cdh5/json-serde-1.3-jar-with-dependencies.jar
 
sbin/start-thriftserver.sh
bin/beeline -u jdbc:hive2://localhost:10000
 
echo "1|abc|1.1|a" >> test.csv
echo "2|def|2.3|b" >> test.csv
 
create table if not exists testCsv (id INT, name STRING, score FLOAT, type STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|';
load data local inpath '/usr/local/tomcat/test.csv' into table testCsv;
 
0: jdbc:hive2://localhost:10000> select * from testCsv;
+-----+-------+--------------------+-------+--+
| id  | name  |       score        | type  |
+-----+-------+--------------------+-------+--+
| 1   | abc   | 1.100000023841858  | a     |
| 2   | def   | 2.299999952316284  | b     |
+-----+-------+--------------------+-------+--+
 
 
echo "{id: 1, name: 'abc', score: 1.1, type: 'a'}" >> test.json
echo "{id: 2, name: 'def', score: 2.2, type: 'b'}" >> test.json
 
create table if not exists testJson (id INT, name STRING, score FLOAT, type STRING) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
load data local inpath '/usr/local/tomcat/test.json' into table testJson;
 
0: jdbc:hive2://localhost:10000> select * from testJson;
+-----+-------+--------------------+-------+--+
| id  | name  |       score        | type  |
+-----+-------+--------------------+-------+--+
| 1   | abc   | 1.100000023841858  | a     |
| 2   | def   | 2.200000047683716  | b     |
+-----+-------+--------------------+-------+--+


+ Recent posts