Here is abc.txt. 

I will demonstrate reading this file and reversing them.

with open('abc.txt', 'r') as f:
    lines = f.readlines()
    print(lines)                # ['AAA\n', 'BBB\n', 'CCC\n', 'DDD\n', 'EEE']

lines.reverse()
print(lines)                    # ['EEE', 'DDD\n', 'CCC\n', 'BBB\n', 'AAA\n']

with open('result.txt', 'w') as f:
    for line in lines:
        line = line.strip()    
        f.write(line+'\n')

To work with csv files, you must download pandas in settings in PyCharm.

 

data framing 

CSV files

To make the data frame, you need to read the CSV files with pandas module. 

import pandas as pd

data = [[1,2,3,4],[5,6,7,8]]        

#Create dataframe
df = pd.DataFrame(data)
print(df)
#    0  1  2  3            column number
# 0  1  2  3  4            index number : 0
# 1  5  6  7  8            index number : 1

# dataframe -> csv file (Save)
df.to_csv('../data/df.csv', header=False, index=False)
print('saved successfully')

Excel files

To deal with Excel files, you also need pandas. 

This is an excel file that includes statistics.

Install openpyxl in the settings and you will see the content in the console. 

import pandas as pd

# open excel file
book = pd.read_excel('../data/stats_104102.xlsx',
                     sheet_name='stats_104102',
                     header=1)     
print(book)

book = book.sort_values(by=2015, ascending=False) #descending 
print(book)

XML files

To read XML files, you need bs4 library. 

First, you need to read the URL, and save the file with the extension XML.


from bs4 import BeautifulSoup           # module for analyzing html, xml files
import urllib.request as req            # download
import os.path

url='http://www.kma.go.kr/weather/forecast/mid-term-rss3.jsp?stnId=108'

savename = 'forecast.xml'

# if not os.path.exists(savename):      
req.urlretrieve(url, savename)          # forecast.xml file download

# Analyze with BeautifulSoup module
xml = open(savename, 'r', encoding='utf-8').read()
soup = BeautifulSoup(xml, 'html.parser')
# print(soup)

# 전국 날씨정보를 info 딕셔너리에 저장
info = {}               # info = { name : weather }
for location in soup.find_all('location'):
    name = location.find('city').text         
    wf = location.find('wf').text               
    tmx = location.find('tmx').text             
    tmn = location.find('tmn').text             

    weather = wf + ':' + tmn + '~' + tmx

    if name not in info:
        info[name] = []
    info[name].append(weather)

print(info)

#To print out
for name in info.keys():
    print('+', name)              
    for weather in info[name]:
        print('|', weather)

To open a text file -> get sum and avg -> save into a new text file

with open('sample.txt', 'r') as f:
    lines = f.readlines()
    print(lines)                 # ['70\n', '60\n', '55\n', '75\n', '95\n', '90\n', '80\n', '80\n', '85\n', '100']

total = 0
for line in lines:
    total += int(line)
avg = total / len(lines)
print('total:', total)           # total: 790
print('avg:', avg)               # avg: 79.0

with open('result.txt', 'w') as f:
    f.write(str(avg))

To read repository from github


import urllib.request as req
import os.path
import json

# To download json file
url = 'https://api.github.com/repositories'
savename = 'repo.json'

if not os.path.exists(savename):       
    req.urlretrieve(url, savename)      

# To read repo.json
items = json.load(open(savename, 'r', encoding='utf-8'))
print(type(items))                      # <class 'list'>
print(items)

# To print out
for item in items:
    print(item['name']+'-'+item['owner']['login'])

This is the data that the URL has. 

wordcount.py

To count the words in the file and organize them in descending order:

def getTextFreq(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        text = f.read()          
        tmp = text.split()     

        fa = {}                 
        for c in tmp:
            if c in fa:          
                fa[c] += 1       
            else:              
                fa[c] = 1       

    return fa                   

result = getTextFreq('../data/data.txt')
# result = getTextFreq('../data/alice.txt')
# result = getTextFreq('../data/hong.txt')
print(type(result))              # <class 'dict'>
print(result)

# Ascending
print(sorted(result.items()))
print(sorted(result.items(), key=lambda x : x[0]))

# Descending
print(sorted(result.items(), key=lambda x : x[0], reverse=True))

# Descending 10..9..8..
result = sorted(result.items(), key=lambda x : x[1], reverse=True)
print(result)

for c, freq in result:
    print('[%s] - [%d]time(s)' %(c, freq))

To count words that the user inserts:


def countWord(filename, word):
    with open(filename, 'r') as f:
        text = f.read()
        text = text.lower()         # to lowercase

        list = text.split()        
        count = list.count(word)   

    return count

word = input('Which word do you want to search?')
word = word.lower()

# result = countWord('../data/data.txt', word)
result = countWord('../data/alice.txt', word)
print('[%s]: %d time(s)'%(word, result))

'Python' 카테고리의 다른 글

Python) Module and package  (0) 2022.11.25
Python) Data Analysis - numpy  (0) 2022.11.24
Python) Regular Expression  (0) 2022.11.21
Python) Database connection with MySQL  (0) 2022.11.20
Python) Database connection with SQLite  (0) 2022.11.19

+ Recent posts